# Variability in Trip Times for Competitive Routes

Of competitive trips, come up with some stats for narrative:
* x% of trips are at or below the 2x cut-off
* Operator has this many total routes (n), this many parallel (n, %), and this many competitive parallel (n, %)

10 most promising corridors to support (overall across operators), leave it in a separate notebook. in urban districts, strong pop signal where it could be served, which are the 10 most promising corridors.

* bus_multiplier is confusing, does 0.5 mean bus is twice as fast as car? maybe move it to difference from car travel (-0.5 hr or +0.25 hr), which may be easier to understand
* if hunter is ok with re-running Google API, then can do it relative to AM peak or PM peak, and select trips all trips within that, then bus multiplier is at least relative to the same time period...easier for stakeholders
* sorting within operator...seems obvious to do it by ridership


In [None]:
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import pandas as pd

from IPython.display import display, Markdown, HTML

from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

alt.themes.register("calitp_theme", styleguide.calitp_theme)

In [None]:
# parameters cell
itp_id = 182

In [None]:
df = pd.read_parquet("./data/stripplot_trips.parquet")

df = (df[(df.calitp_itp_id == itp_id) & 
         (df.plot_group.notna())]
      .reset_index(drop=True)
     )

#df = df.assign(
#    route = df.apply(lambda x: (x.route_id.replace('-13153', '') + 
#                                f"\n {x.pct_trips_competitive * 100}%"), axis=1)
#)

if itp_id == 182:
    df = df.assign(
        route_id = df.route_id.str.replace('-13153', '').astype(int)
    )

operator_name = df.name.iloc[0]
district = df.caltrans_district.iloc[0]

In [None]:
display(Markdown("# Competitive Route Travel Time Variability"))
display(HTML(f"<h1>{operator_name} (ITP ID: {itp_id})</h1>"))

In [None]:
# How to designate p25, p50, p75, and fastest trip?
DARK_GRAY = "#323434"
LIGHT_GRAY = "#797C7C"

def labeling(word):
    label_dict = {
        "bus_multiplier": "Ratio of Bus to Car Travel Time",
        "bus_difference": "Difference in Bus to Car Travel Time (min)"
    }
    
    if word in label_dict.keys():
        word = label_dict[word]
    else:
        word = word.replace('_', ' ').title()
    
    return word

In [None]:
def specific_point(y_col):
    chart = (
        alt.Chart()
        .mark_point(size=20, opacity=0.4, strokeWidth=1.5)
        .encode(
            y=alt.Y(f'{y_col}:Q'),
            color=alt.value(DARK_GRAY)
        )
    )
    
    return chart

In [None]:
#https://altair-viz.github.io/gallery/stripplot.html
def make_stripplot(df, y_col="bus_multiplier", Y_MIN=0, Y_MAX=5):  
    # We want to draw horizontal line on chart
    if y_col == "bus_multiplier":
        df = df.assign(cutoff=2)
    else:
        df = df.assign(cutoff=0)
    
    # Use the same sorting done in the wrangling
    route_sort_order = list(df.sort_values(["calitp_itp_id", 
                                            "pct_trips_competitive", 
                                            "num_competitive",
                                            "p50", 
                                            f"{y_col}_spread"], 
                                       ascending=[True, False, False, True, True]
                                      )
                        .drop_duplicates(subset=["route_id"]).route_id)
    
    chart_title = f"{operator_name}: {labeling(y_col)}"
    subtitle =f"ITP ID: {itp_id}"
    
    stripplot =  (
        alt.Chart()
          .mark_point(size=10, opacity=0.5, strokeWidth=1.5)
          .encode(
            x=alt.X(
                'jitter:Q',
                title=None,
                axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
                scale=alt.Scale(),
                #stack='zero',
            ),
            y=alt.Y(f'{y_col}:Q', title=f"{labeling(y_col)}", 
                    scale=alt.Scale(domain=[Y_MIN, Y_MAX])
                   ),
            color=alt.Color('time_of_day:N', title="Time of Day", 
                            sort=["AM Peak", "Midday", "PM Peak", "Owl Service"],
                            scale=alt.Scale(
                                # Grab colors where we can distinguish between groups
                                range=(cp.CALITP_CATEGORY_BOLD_COLORS[:2] + 
                                       cp.CALITP_CATEGORY_BOLD_COLORS[4:]
                                      )
                            )
                           ),
            tooltip=alt.Tooltip(["route_id", "trip_id", 
                                 "service_hours", "car_duration_hours",
                                 "bus_multiplier", "bus_difference", 
                                 "num_trips", "num_competitive",
                                 "pct_trips_competitive",
                                 "p25", "p50", "p75"
                                ])
          )
        ).transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter='sqrt(10*-2*log(random()))*cos(2*PI*random())'
    )
    
    p50 = (specific_point(y_col)
           .transform_filter(alt.datum.p50_trip==1)
          )

    horiz_line = (
        alt.Chart()
        .mark_rule(strokeDash=[2,3])
        .encode(
            y=alt.Y("cutoff:Q", title=None),
            color=alt.value(DARK_GRAY)
        )
    )
    
    # Add labels
    # https://github.com/altair-viz/altair/issues/920
    text = (stripplot
            .mark_text(align='center')
            .encode(
                y=alt.value(100),
                text=alt.Text('pct_trips_competitive:Q',
                              format='.0%'), 
                   color=alt.value("black"))
           ).transform_filter(alt.datum.fastest_trip==1)
        
    # Must define data with top-level configuration to be able to facet
    chart = (
        (stripplot.properties(width=60) + 
         p50 + horiz_line + text)
        .facet(
            column = alt.Column("route_id:N", title="Route ID", 
                                sort = route_sort_order), 
            data=df
        ).interactive()
        .configure_facet(spacing=0)
        .configure_view(stroke=None)
        #.resolve_scale(y='shared')
        .properties(title={
            "text": chart_title,
            "subtitle": subtitle,
        })
    )
        
    return chart

In [None]:
plot_me = df[df.pct_trips_competitive > 0.5]

y_col = "bus_multiplier"
Y_MIN = plot_me[y_col].min()
Y_MAX = plot_me[y_col].max()

y_col2 = "bus_difference"
Y_MIN2 = plot_me[y_col2].min()
Y_MAX2 = plot_me[y_col2].max()

chart1 = make_stripplot(plot_me[plot_me.plot_group==0], 
                        y_col=y_col, 
                        Y_MIN = Y_MIN, Y_MAX = Y_MAX)
chart2 = make_stripplot(plot_me[plot_me.plot_group==0], 
                        y_col=y_col2, 
                        Y_MIN = Y_MIN2, Y_MAX = Y_MAX2
                       )

In [None]:
chart1

In [None]:
chart2

In [None]:
'''
plot_me = df[df.pct_trips_competitive > 0.5]

y_col = "bus_multiplier"
Y_MAX = plot_me[y_col].max()

groups = list(sorted(plot_me.plot_group.unique()))
print(groups)

for i in groups:
    print(i)
    chart = make_stripplot(plot_me[plot_me.plot_group==i], y_col, 2, Y_MAX)
    display(chart)
'''