In [1]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp.magics
import geopandas as gpd
import intake
import pandas as pd

from IPython.display import display, Markdown, HTML

import parallel_corridors_utils
import deploy_portfolio_yaml
from shared_utils import styleguide, geography_utils
from make_stripplot_data import diff_cutoffs

catalog = intake.open_catalog("./*.yml")
alt.renderers.enable("html")

In [2]:
# parameters cell
itp_id = 182

In [3]:
# Parameters
district = "01 - Eureka"
itp_id = 159


In [4]:
df = catalog.competitive_route_variability.read()

df = (df[(df.calitp_itp_id == itp_id) & 
         (df.route_group.notna())]
      .rename(columns = {"route_name_used": "route_name"})
      .reset_index(drop=True)
     )

if itp_id == 182:
    df = df.assign(
        route_id = df.route_id.str.replace('-13153', '').astype(int)
    )

In [5]:
%%capture_parameters
operator_name = df.name.iloc[0]
district_number = f"{df.caltrans_district.iloc[0].split('-')[0].strip()}"
district_name = f"{df.caltrans_district.iloc[0].split('-')[1].strip().lower().replace(' ', '-')}"

itp_id, operator_name, district_number, district_name

# Lake Transit Authority (ITP ID: 159)

In [6]:
display(HTML("<h1>Competitive & Parallel Routes</h1>")) 

In [7]:
# Grab parameters for narrative text - clean up formatting here 
district = df.caltrans_district.iloc[0]
formatted_date = pd.to_datetime(parallel_corridors_utils.SELECTED_DATE).strftime('%m-%d-%Y')

PCT_COMPETITIVE_THRESHOLD = 0.75
PCT_TRIPS_BELOW_CUTOFF = 0.50

stats = parallel_corridors_utils.operator_parallel_competitive_stats(
    itp_id, PCT_COMPETITIVE_THRESHOLD, PCT_TRIPS_BELOW_CUTOFF)

pct_parallel = round(stats['parallel_routes'] / stats['num_routes'] * 100, 1 )
pct_competitive =  round(stats['competitive_routes'] / stats['num_routes'] * 100, 1 )
pct_viable =  round(stats['viable_competitive_routes'] / stats['num_routes'] * 100, 1 )

In [8]:
# Display a table of route-level stats for each route_group
# Displaying route_name makes chart too crowded

# Get route stats (for entire df)
route_stats = parallel_corridors_utils.competitive_route_level_stats(df)
    
def style_route_stats(df, top15_routes):
    df = df[df.route_id.isin(top15_routes)]
    
    # Rename columns for display
    rename_cols = {
        "route_id": "Route ID",
        "route_name": "Route Name",
        "route_group": "Route Group",
        "num_trips": "# trips",
        "daily_avg_freq": "Daily Avg Freq (trips per hr)",
        "pm_peak_freq": "PM Peak Avg Freq (trips per hr)",
        "percentiles": "25th, 50th, 75th ptile (hrs)",
    }
    
    # Style it
    drop_cols = [
        "calitp_itp_id", "below_cutoff", "route_group", 
        "pct_trips_competitive", "pct_below_cutoff",
        "p25", "p50", "p75",
    ]
    
    # Change alignment for some columns
    # https://stackoverflow.com/questions/59453091/left-align-the-first-column-and-center-align-the-other-columns-in-a-pandas-table
    df_style = (df.sort_values(
        ["pct_trips_competitive", "below_cutoff", 
        "pct_below_cutoff", "route_id"], 
        ascending=[False, False, False, True])
           .drop(columns = drop_cols)
           .rename(columns = rename_cols)
           .style.format(
               subset=['Daily Avg Freq (trips per hr)', 
                       'PM Peak Avg Freq (trips per hr)'], 
               **{'formatter': '{:,.3}'})
                .set_properties(subset=['Route ID', 'Route Name'], 
                                **{'text-align': 'left'})
                .set_properties(subset=['# trips', 'Daily Avg Freq (trips per hr)', 
                                       'PM Peak Avg Freq (trips per hr)'], 
                               **{'text-align': 'center'})
                .set_table_styles([dict(selector='th', 
                                        props=[('text-align', 'center')])
                                        ])
           .hide(axis="index")
           .to_html()
          )
    
    display(HTML("<h4>Route Stats</h4>"))
    display(HTML(df_style))

## Quick Stats

In [9]:
display(
    Markdown(
        f"**Bus routes in service: {stats['num_routes']}**"
        "<br>**Parallel routes** to State Highway Network (SHN): "
        f"**{stats['parallel_routes']} routes ({pct_parallel}%)**"
        "<br>**Competitive routes** against car travel (within 2x car travel time): "
        f"**{stats['competitive_routes']} routes ({pct_competitive}%)**"
        "<br>**Viable competitive routes** against car travel "
        "(within 2x car travel time and within 20, 30, or 40 min cut-off): "
        f"**{stats['viable_competitive_routes']} routes ({pct_viable}%)**"
    )
)

**Bus routes in service: 9**<br>**Parallel routes** to State Highway Network (SHN): **9 routes (100.0%)**<br>**Competitive routes** against car travel (within 2x car travel time): **8 routes (88.9%)**<br>**Viable competitive routes** against car travel (within 2x car travel time and within 20, 30, or 40 min cut-off): **6 routes (66.7%)**

In [10]:
def set_yaxis_range(df, y_col):
    Y_MIN = df[y_col].min()
    Y_MAX = df[y_col].max()
    
    return Y_MIN, Y_MAX

# Rather than set it according to the operator, which can vary,
# just use set thresholds for each group
def top15_routes(df, route_group):
    # grab top 15 routes where majority of trips are below that cut-off 
    # but show 15 max, otherwise, show less, since these won't be as interesting to operators
    df2 = (df[(df.route_group==route_group) &
              (df.pct_below_cutoff >= PCT_TRIPS_BELOW_CUTOFF)]
           .sort_values(["calitp_itp_id", "below_cutoff", 
                         "pct_below_cutoff", "route_id"],
                        ascending = [True, False, False, True]
                       )
           .drop_duplicates(subset=["calitp_itp_id", "route_id"])
          ).head(15)
    
    return list(df2.route_id)

In [11]:
def caption_with_cutoff(cutoff):
    sentence = (
        f"<br>These are routes that have **at least {int(PCT_TRIPS_BELOW_CUTOFF*100)}% "
        "of their trips take no more than an "
        f"additional {cutoff} min** compared to a car "
        "(up to 15 routes) are shown."
        "<br>Within each route group, routes are sorted in descending order "
        "by % competitive trips and # trips below the additional time cut-off."
    )
    return sentence

CAPTIONS_DICT = {
    "short": caption_with_cutoff(diff_cutoffs['short']),
    "medium": caption_with_cutoff(diff_cutoffs['medium']),
    "long": caption_with_cutoff(diff_cutoffs['long']),
}

In [12]:
def setup_charts(df, PCT_COMPETITIVE_THRESHOLD, route_group):
    # Set up df for charting (cut-off at some threshold to show most competitive routes)
    plot_me = (df[df.pct_trips_competitive > PCT_COMPETITIVE_THRESHOLD]
           .drop(columns = "geometry")
    )
    
    # Set y-ranges to be the same across route_groups, 
    # otherwise charts fluctuate too much, even within operator
    y_col1 = "bus_multiplier"
    Y_MIN1, Y_MAX1 = set_yaxis_range(plot_me, y_col1)

    y_col2 = "bus_difference"
    Y_MIN2, Y_MAX2 = set_yaxis_range(plot_me, y_col2)
    
    # Grab the routes to plot
    subset_routes = top15_routes(plot_me, route_group)
        
    # Display charts
    multiplier_chart = parallel_corridors_utils.make_stripplot(
        plot_me[plot_me.route_id.isin(subset_routes)], 
        y_col1, Y_MIN = Y_MIN1, Y_MAX = Y_MAX1
    )
    
    difference_chart = parallel_corridors_utils.make_stripplot(
        plot_me[plot_me.route_id.isin(subset_routes)], 
        y_col2, Y_MIN = Y_MIN2, Y_MAX = Y_MAX2
    )
    
    return subset_routes, multiplier_chart, difference_chart            

In [13]:
def display_charts_for_route_group(df, route_group):    
    subset_routes, mult, diff  = setup_charts(df, PCT_COMPETITIVE_THRESHOLD, route_group)

    if len(subset_routes) > 0:
        display(Markdown(f"{CAPTIONS_DICT[route_group]}"))

        display(mult)
        display(diff)
        
        # Display route stats for just plotted routes
        style_route_stats(route_stats, subset_routes)

## Viable Competitive Routes
### Short Routes (< 1 hr)

In [14]:
# Allow for possibility that operator doesn't have routes that fall into certain route_groups
# But, jupyterbook will not like markdown with f-string
# separate out route_group and put in each cell, and use try/except + if to display...
# but all 3 route groups must be displayed. just add sentence about why no routes were returned at the end
route_group = "short"
try:
    display_charts_for_route_group(df, route_group)
except:
    print("No routes meet this criteria.")

<br>These are routes that have **at least 50% of their trips take no more than an additional 20 min** compared to a car (up to 15 routes) are shown.<br>Within each route group, routes are sorted in descending order by % competitive trips and # trips below the additional time cut-off.

Route ID,Route Name,# trips,Daily Avg Freq (trips per hr),"25th, 50th, 75th ptile (hrs)",PM Peak Avg Freq (trips per hr)
2042,"Southshore, Clearlake To Lakeport",14,0.58,"0.82, 0.82, 0.92",1.0


### Medium Routes (1-1.5 hrs)

In [15]:
route_group = "medium"
try:
    display_charts_for_route_group(df, route_group)
except:
    print("No routes meet this criteria.")

<br>These are routes that have **at least 50% of their trips take no more than an additional 30 min** compared to a car (up to 15 routes) are shown.<br>Within each route group, routes are sorted in descending order by % competitive trips and # trips below the additional time cut-off.

Route ID,Route Name,# trips,Daily Avg Freq (trips per hr),"25th, 50th, 75th ptile (hrs)",PM Peak Avg Freq (trips per hr)
2017,"Northshore, Clearlake To Lakeport",21,0.88,"1.27, 1.33, 1.33",1.0
2041,Lakeport – Ukiah,6,0.25,"1.43, 1.43, 1.43",0.33
2018,"Soda Bay, Kit’S Corner To Lakeport",6,0.25,"0.86, 0.95, 0.97",0.33
2033,"Highway 29, Clearlake To Deer Park",8,0.33,"1.15, 1.3, 1.44",0.33
2031,"Highway 175, Kit’S Corner To Middletown",7,0.29,"0.63, 0.63, 0.77",


### Long Routes (> 1.5 hrs)

In [16]:
route_group = "long"
try:
    display_charts_for_route_group(df, route_group)
except:
    print("No routes meet this criteria.")

No routes meet this criteria.


### GTFS Real-Time Speedmaps

In [17]:
RT_SITE_YAML = "../portfolio/sites/rt.yml"

# Grab list of ITP IDs with GTFS RT speedmaps
rt_itp_id = deploy_portfolio_yaml.check_if_rt_data_available(RT_SITE_YAML)

# Construct the URL 
RT_URL = ("https://analysis.calitp.org/rt/"
          f"district_{district_number}-{district_name}/"
          f"speedmaps__itp_id_{itp_id}.html"
         ) 

# If this ITP ID is found in RT analysis, give URL to that operator in analysis.calitp.org
if itp_id in rt_itp_id:
    display(HTML(f'''<a href={RT_URL}>GTFS RT speedmaps here.</a>'''))

else:
    display(HTML("No GTFS RT data available."))