In [None]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import branca
import calitp.magics
import geopandas as gpd
import intake
import pandas as pd

from IPython.display import display, Markdown, HTML

import parallel_corridors_utils
import deploy_portfolio_yaml
from shared_utils import styleguide

catalog = intake.open_catalog("./*.yml")
alt.renderers.enable("html")
alt.data_transformers.enable('default', max_rows=None)

In [None]:
# parameters cell
itp_id = 182

In [None]:
df = catalog.competitive_route_variability.read()

df = (df[df.calitp_itp_id == itp_id]
      .rename(columns = {"route_name_used": "route_name"})
      .reset_index(drop=True)
     )


if itp_id == 182:
    df = df.assign(
        route_id = df.route_id.str.split('-131', expand=True)[0],
    )

# Use route_short_name instead
# But where route_short_name is missing, use route_id
df = df.assign(
    route_id2 = df.apply(lambda x: x.route_short_name 
                         if x.route_short_name is not None 
                         else x.route_id, axis=1),
    route_short_name = df.apply(lambda x: x.route_long_name
                               if x.route_short_name is None
                               else x.route_short_name, axis=1)
)

In [None]:
%%capture_parameters
operator_name = df.calitp_agency_name.iloc[0]
district_number = f"{df.caltrans_district.iloc[0].split('-')[0].strip()}"
district_name = f"{df.caltrans_district.iloc[0].split('-')[1].strip().lower().replace(' ', '-')}"

itp_id, operator_name, district_number, district_name

# {operator_name}

In [None]:
display(HTML("<h1>Competitive Routes near SHN</h1>")) 

In [None]:
# Grab parameters for narrative text - clean up formatting here 
district = df.caltrans_district.iloc[0]
formatted_date = pd.to_datetime(
    parallel_corridors_utils.ANALYSIS_DATE).strftime('%m-%d-%Y')

PCT_COMPETITIVE_THRESHOLD = parallel_corridors_utils.PCT_COMPETITIVE_THRESHOLD

stats = parallel_corridors_utils.operator_parallel_competitive_stats(
    itp_id, PCT_COMPETITIVE_THRESHOLD)

pct_parallel = round(
    stats['on_shn_or_intersecting_routes'] / stats['num_routes'] * 100 , 1)
pct_competitive =  round(
    stats['competitive_routes'] / stats['num_routes'] * 100, 1)
pct_competitive_and_parallel = round(
    stats['competitive_routes_on_shn_or_intersecting'] / stats['num_routes'] * 100, 1)

## Quick Stats

In [None]:
display(
    Markdown(
        f"**Bus routes in service: {stats['num_routes']}**"
        "<br>**Routes near** the State Highway Network (SHN): "
        f"**{stats['on_shn_or_intersecting_routes']} routes ({pct_parallel}%)**"
        "<br>**Competitive routes** against car travel "
        "(within 1.5x car travel time): "
        f"**{stats['competitive_routes']} routes ({pct_competitive}%)**"
        "<br>**Near SHN and competitive routes** against car travel "
        "(within 1.5x car travel time): "
        f"**{stats['competitive_routes_on_shn_or_intersecting']} routes ({pct_competitive_and_parallel}%)**"
    )
)

In [None]:
def set_yaxis_range(df, y_col):
    Y_MIN = df[y_col].min()
    Y_MAX = df[y_col].max()
    
    return Y_MIN, Y_MAX

# Rather than set it according to the operator, which can vary,
# just use set thresholds for each group
def top15_routes(df, route_group):
    # grab top 15 routes where majority of trips are below that cut-off 
    # but show 15 max, otherwise, show less, since these won't be as interesting to operators
    df2 = (df[(df.route_group==route_group) & 
              (df.category.isin(["on_shn", "intersects_shn"]))]
           .sort_values(["calitp_itp_id", 
                         "pct_trips_competitive", "route_id"],
                        ascending = [True, False, True]
                       )
           .drop_duplicates(subset=["calitp_itp_id", "route_id"])
          ).head(15)
    
    if len(df2) > 1:
        return list(df2.route_id)

In [None]:
def remove_datetime_columns(df: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    # These columns will throw JSON not serializable error
    # in altair
    # but can't use df.select_dtypes(exclude="datetime") because
    # some are stored as objects
    remove_cols = ["service_date", 
                   "trip_first_departure", 
                   "trip_departure"]
    
    return df.drop(columns = remove_cols)

def remove_datetime_and_geom_columns(df: gpd.GeoDataFrame) -> pd.DataFrame:
    return remove_datetime_columns(df).drop(
        columns = "geometry")

In [None]:
def setup_charts(df: pd.DataFrame, route_group: str) -> list:
    
    # Set y-ranges to be the same across route_groups, 
    # otherwise charts fluctuate too much, even within operator
    y_col1 = "bus_multiplier"
    Y_MIN1, Y_MAX1 = set_yaxis_range(df, y_col1)

    # Grab the routes to plot
    subset_routes = top15_routes(df, route_group)
    
    if subset_routes is not None:
        if len(subset_routes) > 1:
        
            # Display charts
            multiplier_chart = parallel_corridors_utils.make_stripplot(
                df[df.route_id.isin(subset_routes)], 
                y_col1, Y_MIN = Y_MIN1, Y_MAX = 2.5
            )

            return subset_routes, multiplier_chart 

    else: 
        return None


In [None]:
# Display a table of route-level stats for each route_group
# Displaying route_name makes chart too crowded    
def style_route_stats(df):
    df = df.assign(
        route_short_name = df.apply(lambda x: 
                                    x.route_long_name if x.route_short_name is None
                                    else x.route_short_name, axis=1)
    )
    
    # Rename columns for display
    rename_cols = {
        "route_id2": "Route ID",
        "route_short_name": "Route Name",
        "route_group": "Route Group",
        "num_trips": "# trips",
        "daily_avg_freq": "Daily Avg Freq (trips per hr)",
        "pm_peak_freq": "PM Peak Avg Freq (trips per hr)",
        "percentiles": "25th, 50th, 75th ptile (hrs)",
        "mean_speed_mph": "Avg Daily Speed (mph)",
    }
    
    # Style it
    drop_cols = [
        "calitp_itp_id", "route_id", "route_group", 
        "pct_trips_competitive",
        "p25", "p50", "p75",
        "category"
    ]
    
    # Change alignment for some columns
    # https://stackoverflow.com/questions/59453091/left-align-the-first-column-and-center-align-the-other-columns-in-a-pandas-table
    df_style = (df.sort_values(
        ["pct_trips_competitive", "route_id2"], 
        ascending=[False , True])
           .drop(columns = drop_cols)
           .rename(columns = rename_cols)
           .style.format(
               subset=['Daily Avg Freq (trips per hr)', 
                       'PM Peak Avg Freq (trips per hr)', 
                       'Avg Daily Speed (mph)', 
                      ], 
               **{'formatter': '{:,.3}'})
                .set_properties(subset=['Route ID', 'Route Name'], 
                                **{'text-align': 'left'})
                .set_properties(subset=['# trips', 'Daily Avg Freq (trips per hr)', 
                                       'PM Peak Avg Freq (trips per hr)', 
                                        'Avg Daily Speed (mph)',
                                       ], 
                               **{'text-align': 'center'})
                .set_table_styles([dict(selector='th', 
                                        props=[('text-align', 'center')])
                                        ])
           .hide(axis="index")
           .to_html()
          )
    
    display(HTML("<h4>Route Stats</h4>"))
    display(HTML(df_style))

In [None]:
# Allow for possibility that operator doesn't have routes that fall into certain route_groups
# But, jupyterbook will not like markdown with f-string
# separate out route_group and put in each cell, and use try/except + if to display...
# but all 3 route groups must be displayed. 
# just add sentence about why no routes were returned at the end
# Change to fewer than 2 routes, instead of no routes, because we don't want to show just 1 route

SPEED_COLORSCALE = branca.colormap.step.RdYlGn_11.scale(vmin=0, vmax=40)

def make_map(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
    keep_cols = ["calitp_itp_id", "route_id", 
                 "route_short_name", "route_long_name", 
                 "bus_multiplier", 
                 "pct_trips_competitive",
                 "mean_speed_mph", "geometry"
                ]
    
    gdf2 = gdf[keep_cols]
    
    if gdf2.mean_speed_mph.isnull().all():
        plot_col = "route_id"
        c = True
        colorscale = "tab20"
    else:
        plot_col = "mean_speed_mph"
        gdf2 = gdf2[gdf2.mean_speed_mph.notna()]
        c = False
        colorscale = SPEED_COLORSCALE
    
    m = gdf2.explore(plot_col, categorical = c, 
                     cmap = colorscale,
                     tiles = "CartoDB Positron")
    
    display(m)

    
def display_charts_for_route_group(df, route_group):
        
    plot_me = remove_datetime_columns(
        df[(df.pct_trips_competitive > PCT_COMPETITIVE_THRESHOLD) & 
           (df.category.isin(["on_shn", "intersects_shn"]))
          ])
    
    
    if setup_charts(plot_me, route_group) is not None:
        
        subset_routes, mult = setup_charts(plot_me.drop(columns = "geometry"), 
                                           route_group)
        
        display(mult)

        # Display route stats for just plotted routes
        route_stats = parallel_corridors_utils.competitive_route_level_stats(
            plot_me[plot_me.route_id.isin(subset_routes)].drop(columns = "geometry"))

        style_route_stats(route_stats)

        make_map(plot_me[plot_me.route_id.isin(subset_routes)])
        
    else:
        print("No routes meet this criteria.")

## Competitive Routes
### Short Routes (< 1 hr)

In [None]:
route_group = "short"
display_charts_for_route_group(df, route_group)

### Medium Routes (1-1.5 hrs)

In [None]:
route_group = "medium"
display_charts_for_route_group(df, route_group)

### Long Routes (> 1.5 hrs)

In [None]:
route_group = "long"
display_charts_for_route_group(df, route_group)

### GTFS Real-Time Speedmaps

In [None]:
RT_SITE_YAML = "../portfolio/sites/rt.yml"

# Grab list of ITP IDs with GTFS RT speedmaps
rt_itp_id_dict = deploy_portfolio_yaml.check_if_rt_data_available(RT_SITE_YAML)


if itp_id in list(rt_itp_id_dict.keys()):

    # Construct the URL 
    RT_URL = ("https://analysis.calitp.org/rt/"
              f"district_{district_number}-{district_name}/"
              f"{rt_itp_id_dict[itp_id]}__speedmaps__"
              f"district_{district_number}-{district_name}__"
              f"itp_id_{itp_id}.html"
             ) 

    # If this ITP ID is found in RT analysis, give URL to that operator in analysis.calitp.org
    display(HTML(f'''<a href={RT_URL}>GTFS RT speedmaps here.</a>'''))

else:
    display(HTML("No GTFS RT data available."))