# Variability in Trip Times for Competitive Routes

Of competitive trips, come up with some stats for narrative:
* x% of trips are at or below the 2x cut-off
* Operator has this many total routes (n), this many parallel (n, %), and this many competitive parallel (n, %)

In [1]:
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import pandas as pd

from calitp.tables import tbl
from IPython.display import display, Markdown, HTML
from siuba import *

from shared_utils import calitp_color_palette as cp
from shared_utils import styleguide

alt.themes.register("calitp_theme", styleguide.calitp_theme)

E0420 17:24:57.349518684    1509 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies
E0420 17:24:59.700074671    1509 fork_posix.cc:70]           Fork support is only compatible with the epoll1 and poll polling strategies


<function shared_utils.styleguide.calitp_theme(font='Raleway', labelFont='Nunito Sans', font_size=18, chart_width=400, chart_height=250, markColor='#8CBCCB', axisColor='#cbcbcb', guideLabelColor='#474747', guideTitleColor='#333', blackTitle='#333', backgroundColor='white', PALETTE={'category_bright': ['#2EA8CE', '#EB9F3C', '#F4D837', '#51BF9D', '#8CBCCB', '#9487C0'], 'category_bold': ['#136C97', '#E16B26', '#F6BF16', '#00896B', '#7790A3', '#5B559C'], 'diverging': ['#E16B26', '#EB9F3C', '#f6e7e1', '#8CBCCB', '#2EA8CE', '#136C97'], 'sequential': ['#B9D6DF', '#8CBCCB', '#2EA8CE', '#136C97', '#0B405B']})>

In [2]:
# parameters cell
itp_id = 182

In [8]:
df = pd.read_parquet("./data/stripplot_trips.parquet")

df = (df[(df.calitp_itp_id == itp_id) & 
         (df.plot_group.notna())]
      .reset_index(drop=True)
     )

if itp_id == 182:
    df = df.assign(
        route_id = df.route_id.str.replace('-13153', '').astype(int)
    )

operator_name = df.name.iloc[0]
district = df.caltrans_district.iloc[0]

In [14]:
display(Markdown("# Competitive Route Travel Time Variability"))
display(HTML(f"<h1>{operator_name} (ITP ID: {itp_id})</h1>"))

# Competitive Route Travel Time Variability

In [15]:
def labeling(word):
    word = word.replace('_', ' ').title()
    return word

In [16]:
def specific_point(y_col, color):
    chart = (
        alt.Chart()
        .mark_point(size=20, opacity=0.8)
        .encode(
            y=alt.Y(f'{y_col}:Q'),
            color=alt.value(color),
        )
    )
    
    return chart

In [17]:
#https://altair-viz.github.io/gallery/stripplot.html
def make_stripplot(df, y_col):  
    # We want to draw horizontal line on chart
    df = df.assign(cutoff=2)    
    chart_title = f"{operator_name}: {labeling(y_col)}"
    subtitle =f"ITP ID: {itp_id}"
    
    stripplot =  (
        alt.Chart()
          .mark_point(size=15, opacity=0.8)
          .encode(
            x=alt.X(
                'jitter:Q',
                title=None,
                axis=alt.Axis(values=[0], ticks=True, grid=False, labels=False),
                scale=alt.Scale(),
                stack='zero',
            ),
            y=alt.Y('bus_multiplier:Q', title="Bus Multiplier"),
            color=alt.Color('time_of_day:N', title="Time of Day", 
                            sort=["AM Peak", "Midday", "PM Peak", "Owl Service"],
                            scale=alt.Scale(range=cp.CALITP_CATEGORY_BRIGHT_COLORS)
                           ),
            tooltip=alt.Tooltip(["route_id", "trip_id", 
                                 "service_hours", "car_duration_hours",
                                 "bus_multiplier", 
                                 "competitive_trip", "competitive_route"])
          )
        ).transform_calculate(
            # Generate Gaussian jitter with a Box-Muller transform
            jitter='sqrt(10*-2*log(random()))*cos(2*PI*random())'
    )
    
    # Fastest trip
    trip = (specific_point(y_col, "gray")
            .transform_filter(alt.datum.competitive_trip==1)
           )
    
    p25 = (specific_point(y_col, "black")
           .transform_filter(alt.datum.service_hours==alt.datum.p25)
          )
    
    p50 = (specific_point(y_col, "black")
           .transform_filter(alt.datum.service_hours==alt.datum.p50)
          )
    
    p75 = (specific_point(y_col, "black")
           .transform_filter(alt.datum.service_hours==alt.datum.p75)
          )
    
    '''
    trip = (
        alt.Chart()
        .mark_point(size=25)
        .encode(
            y=alt.Y(f'{y_col}:Q'),
            color=alt.value("black"),
        ).transform_filter(alt.datum.competitive_trip==1)
    )
    '''
    
    horiz_line = (
        alt.Chart()
        .mark_rule(strokeDash=[2,3])
        .encode(
            y=alt.Y("cutoff:Q", title=None),
            color=alt.value("gray")
        )
    )
    
    # Must define data with top-level configuration to be able to facet
    chart = (
        (stripplot.properties(width=50) + 
         trip + p25 + p50 + p75 +
         horiz_line)
        .facet(
            column = alt.Column("route_id:N", title="Route ID"), 
            data=df
        ).interactive()
        .configure_facet(spacing=0)
        .configure_view(stroke=None)
        .properties(title={
            "text": chart_title,
            "subtitle": subtitle,
        })
    )
        
    return chart

In [18]:
groups = list(df.plot_group.unique())
print(groups)
for i in groups:
    chart = make_stripplot(df[df.plot_group==i], "bus_multiplier")
    display(chart)

[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
