In [1]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp.magics
import geopandas as gpd
import intake
import pandas as pd

from IPython.display import display, Markdown, HTML

import parallel_corridors_utils
from shared_utils import styleguide

catalog = intake.open_catalog("./*.yml")
alt.renderers.enable("html")

In [2]:
# parameters cell
itp_id = 182

In [3]:
# Parameters
district = 7
itp_id = 182


In [4]:
df = catalog.competitive_route_variability.read()

df = (df[(df.calitp_itp_id == itp_id) & 
         (df.route_group.notna())]
      .reset_index(drop=True)
     )


if itp_id == 182:
    df = df.assign(
        route_id = df.route_id.str.replace('-13153', '').astype(int)
    )

In [5]:
%%capture_parameters
operator_name = df.name.iloc[0]

itp_id, operator_name

# Los Angeles County Metropolitan Transportation Authority (ITP ID: 182)

In [6]:
display(HTML("<h1>Competitive & Parallel Routes</h1>")) 

In [7]:
# Grab parameters for narrative text - clean up formatting here 
district = df.caltrans_district.iloc[0]
formatted_date = pd.to_datetime(parallel_corridors_utils.SELECTED_DATE).strftime('%m-%d-%Y')

PCT_COMPETITIVE_THRESHOLD = 0.75

stats = parallel_corridors_utils.operator_parallel_competitive_stats(
    itp_id, PCT_COMPETITIVE_THRESHOLD)

pct_parallel = round(stats['parallel_routes'] / stats['num_routes'] * 100,1 )
pct_competitive =  round(stats['competitive_routes'] / stats['num_routes'] * 100,1 )

In [8]:
display(
    Markdown(
        f"**Bus routes in service**: {stats['num_routes']} "
        "<br>**Parallel routes** to State Highway Network (SHN): "
        f"{stats['parallel_routes']} routes ({pct_parallel}%)"
        f"<br>**Competitive routes** against car travel: {stats['competitive_routes']} routes ({pct_competitive}%)"
    )
)

**Bus routes in service**: 113 <br>**Parallel routes** to State Highway Network (SHN): 73 routes (64.6%)<br>**Competitive routes** against car travel: 50 routes (44.2%)

In [9]:
def set_yaxis_range(df, y_col):
    Y_MIN = df[y_col].min()
    Y_MAX = df[y_col].max()
    
    return Y_MIN, Y_MAX

# Rather than set it according to the operator, which can vary,
# just use set thresholds for each group
diff_cutoffs = {
    "short": 20,
    "medium": 30,
    "long": 40,
}

PCT_TRIPS_BELOW_CUTOFF = 0.25

def top15_routes(df, route_group):
    df2 = (df[df.route_group==route_group])
    # Set a cut-off to enable sorting, where most of the trips are 
    # below a certain time difference cut-off, 
    # grab top 15 routes where majority of trips are below that cut-off 

    route_cols = ["calitp_itp_id", "route_id"]

    df2 = df2.assign(
        below_cutoff = df2.apply(lambda x: 1 if x.bus_difference <= diff_cutoffs[route_group] 
                                 else 0, axis=1),
        num_trips = df2.groupby(route_cols)["trip_id"].transform("count")
    )

    df2["below_cutoff"] = df2.groupby(route_cols)["below_cutoff"].transform("sum")
    df2["pct_below_cutoff"] = df2.below_cutoff.divide(df2.num_trips)

    # At least half the trips are below that cut-off
    # but show 15 max, otherwise, show less, since these won't be as interesting to operators
    df3 = (df2[df2.pct_below_cutoff >= PCT_TRIPS_BELOW_CUTOFF]
           .sort_values(["calitp_itp_id", "below_cutoff", 
                         "pct_below_cutoff", "route_id"],
                        ascending = [True, False, False, True]
                       )
           .drop_duplicates(subset=["calitp_itp_id", "route_id"])
          ).head(15)
    
    return list(df3.route_id)

In [10]:
short_caption = (
    "Short routes travel time: <= 1 hr"
    f"<br>These are routes that have at least {int(PCT_TRIPS_BELOW_CUTOFF*100)}% of their trips "
    f"take no more than an additional {diff_cutoffs['short']} min compared to a car "
    "(up to 15 routes) are shown."
)

med_caption = (
    "Medium routes travel time: 1-1.5 hrs"
    f"<br>These are routes that have at least {int(PCT_TRIPS_BELOW_CUTOFF*100)}% of their trips "
    f"take no more than an additional {diff_cutoffs['medium']} min compared to a car "
    "(up to 15 routes) are shown."
)

long_caption = (
    "Long routes travel time: > 1.5 hrs"
    f"<br>These are routes that have at least {int(PCT_TRIPS_BELOW_CUTOFF*100)}% of their trips "
    f"take no more than an additional {diff_cutoffs['long']} min compared to a car "
    "(up to 15 routes) are shown.")


CAPTIONS_DICT = {
    "short": short_caption,
    "medium": med_caption,
    "long": long_caption,
}

In [11]:
def generate_report(df, PCT_COMPETITIVE_THRESHOLD):
    # Set up df for charting (cut-off at some threshold to show most competitive routes)
    plot_me = (df[df.pct_trips_competitive > PCT_COMPETITIVE_THRESHOLD]
           .drop(columns = "geometry")
    )
    
    y_col1 = "bus_multiplier"
    Y_MIN1, Y_MAX1 = set_yaxis_range(plot_me, y_col1)

    y_col2 = "bus_difference"
    Y_MIN2, Y_MAX2 = set_yaxis_range(plot_me, y_col2)
    
    def combine_stripplots(df):
        multiplier_chart = parallel_corridors_utils.make_stripplot(
            df, y_col1, Y_MIN = Y_MIN1, Y_MAX = Y_MAX1
        )


        difference_chart = parallel_corridors_utils.make_stripplot(
            df, y_col2, Y_MIN = Y_MIN2, Y_MAX = Y_MAX2
        )
            
        return multiplier_chart, difference_chart
    
    for r in ["short", "medium", "long"]:
        try:
            subset_routes = top15_routes(plot_me, r)

            # Allow for possibility that operator doesn't have routes that fall into certain route_groups
            if len(subset_routes) > 0:
                mult, diff = combine_stripplots(plot_me[plot_me.route_id.isin(subset_routes)])
                
                # Set an id anchor
                display(HTML(f"<a id={r}></a>"))        

                display(HTML(f"<h3>{r.title()} Routes</h3>"))
                display(
                    Markdown(
                        f"{CAPTIONS_DICT[r]}"
                        "Within each route group, routes are sorted in descending order "
                        "by % competitive trips and # trips below the additional time cut-off."
                    )
                )
                
                display(mult)
                display(diff)
        except:
            print(f"No routes in the {r} route_group")

In [12]:
generate_report(df, PCT_COMPETITIVE_THRESHOLD)

Short routes travel time: <= 1 hr<br>These are routes that have at least 25% of their trips take no more than an additional 20 min compared to a car (up to 15 routes) are shown.Within each route group, routes are sorted in descending order by % competitive trips and # trips below the additional time cut-off.

Medium routes travel time: 1-1.5 hrs<br>These are routes that have at least 25% of their trips take no more than an additional 30 min compared to a car (up to 15 routes) are shown.Within each route group, routes are sorted in descending order by % competitive trips and # trips below the additional time cut-off.

Long routes travel time: > 1.5 hrs<br>These are routes that have at least 25% of their trips take no more than an additional 40 min compared to a car (up to 15 routes) are shown.Within each route group, routes are sorted in descending order by % competitive trips and # trips below the additional time cut-off.