In [None]:
%%capture

import warnings
warnings.filterwarnings("ignore")

import altair as alt
import pandas as pd

import calitp_data_analysis.magics
from great_tables import GT

import prep_data
import chart_utils

alt.data_transformers.enable("vegafusion")

In [None]:
# Comment out, this is `parameters` tagged cell
#name = "Torrance Schedule"

In [None]:
%%capture_parameters
name

# {name} 

One of the most common transit user behaviors is to consult an app (Google Maps, Apple Maps, NextBus, etc) to find out when the bus or train is going to arrive.

That widely desired piece of information is powered by GTFS Real-Time Trip Updates, specifically the [Stop Time Updates](https://gtfs.org/documentation/realtime/reference/#message-stoptimeupdate) specification. The underlying data produced here is huge. Imagine every instance a bus arrives at a stop in California. Multiply that by 30 for the 30 minutes before the bus arrives, and that's the dataset we're working to distill into usable performance metrics for all transit operators.

Generally, we want better transit user experience. Specifically, the performance metrics we can derive from GTFS RT Trip Updates distills into the following objectives:
1. Increase prediction reliability and accuracy
2. Increase the availability and completeness of GTFS RT
3. Decrease the inconsistency and fluctuations of predictions 

In [None]:
daily_df = prep_data.import_stop_df(
    is_daily = True,
    filters = [[("schedule_name", "==", name)]]
)

daytype_df = prep_data.import_stop_df(
    is_daily = False, 
    filters = [[("schedule_name", "==", name)]]
)

In [None]:
# Create aggregated binned dataset 
# replicate what altair does with count() for histogram
# do this to avoid the autosave timeout that seems to happen after 4 seconds
percentile_columns = [
    "pct_tu_predictions_early", 
    "pct_tu_predictions_ontime", 
    "pct_tu_predictions_late",
    "pct_tu_accurate_minutes", 
    "pct_tu_complete_minutes"
]

daily_df2 = prep_data.daily_binned_counts_by_deciles(
    daily_df, percentile_columns
).pipe(
    prep_data.summary_counts_by_bins,
    group_cols = ["service_date", "base64_url"],
    percentile_columns = percentile_columns
).astype({"service_date": "str"}) # make date string for altair legend

daytype_df2 = prep_data.daily_binned_counts_by_deciles(
    daytype_df, percentile_columns
).pipe(
    prep_data.summary_counts_by_bins,
    group_cols = ["year", "month", "month_first_day", 
                  "day_type", "schedule_base64_url", "tu_base64_url"],
    percentile_columns = percentile_columns
)

## Reliable Prediction Accuracy

We can measure progress on this large objective with several metrics:

* Share of predictions that are early / on-time / late
* Prediction error (difference between `predicted_arrival` and `actual_arrival`)
* Share predictions that are accurate (prediction error categorized as boolean of accurate or not depending an exponential equation)
   * The prediction is accurate if  it falls within the bounds of `-60ln(Time to Prediction+1.3) < Prediction Error < 60ln(Time
to Prediction+1.5)`, the further out before arrival, the more generous the buffer. 
* Time period: 30 minutes before the arrival at stop 

### Percent Predictions Early / On-Time / Late 

* column used `pct_tu_predictions_early`, `pct_tu_predictions_ontime`, `pct_tu_predictions_late`
* For transit users, **higher proportions of `% predictions early / on-time` mean you are more likely to catch the bus** if you follow the prediction exactly.
* <span style="color:#4477aa">**Goal:** increase the share of early / on-time predictions and lower share of late predictions.</span>We would rather have transit users follow the predictions and wait for the bus.

In [None]:
chart_list = [
    chart_utils.histogram_line_chart_by_date(
        daily_df2, 
        metric_column = f"pct_tu_predictions_{c}",
        legend_color_column = "service_date"
    ) for c in ["early", "ontime", "late"]
]

combined_percent_daily_chart = alt.hconcat(*chart_list).resolve_scale(y='shared')
combined_percent_daily_chart

In [None]:
chart_list = [
    chart_utils.histogram_line_chart_by_date(
        daytype_df2, 
        metric_column = f"pct_tu_predictions_{c}",
        legend_color_column = "day_type"
    ) for c in ["early", "ontime", "late"]
]

combined_percent_daytype_chart = alt.hconcat(*chart_list).resolve_scale(y='shared')
combined_percent_daytype_chart

### High Share of Late Predictions for Weekday Stops Map

If weekday stops can't be plotted, weekend stops will be used. 

In [None]:
# These stops have lots of late predictions (that's the type of error we want to avoid)
m = chart_utils.make_map(
    daytype_df[daytype_df.pct_tu_predictions_late > 0.25], 
    "pct_tu_predictions_late"
)
m

### Average Prediction Error 
* column used `avg_prediction_error_minutes`
* For transit users, **negative values for `avg_prediction_error` mean you miss the bus** if you follow the prediction exactly.
* <span style="color:#4477aa">**Goal 1:** minimize occurrences of negative prediction errors.</span>We would rather have transit users follow the predictions and wait for the bus.
* <span style="color:#4477aa">**Goal 2:** tighten the range of prediction errors and have the range move closer to zero for shorter expected wait times.</span> 
   * Large positive prediction error values mean users are expected to wait longer by following the prediction. 
   * By tightening the box and centering it on zero, users experience both more reliable predictions and shorter wait times. 

In [None]:
chart_utils.boxplot_by_date(daily_df, "avg_prediction_error_minutes")

### % of minutes with accurate predictions
* column used: `pct_accurate_minutes`
* For transit users, **a higher proportion of `% accurate minutes` means you are getting reliable predictions for longer stretches of time before the bus arrives**.
* <span style="color:#4477aa">**Goal:** increase the share of accurate minutes.</span>
* Note: this metric does depend on the exponential curve, which means that anything outside this fairly assertive curve means low performance on this metric.

In [None]:
chart_utils.histogram_line_chart_by_date(
    daily_df2, 
    metric_column = "pct_tu_accurate_minutes",
    legend_color_column = "service_date"
) 

In [None]:
chart_utils.histogram_line_chart_by_date(
    daytype_df2, 
    metric_column = "pct_tu_accurate_minutes",
    legend_color_column = "day_type"
) 

## Availability of Acceptable StopTimeUpdate Messages  
### % of minutes with available predictions 
* column used: `pct_tu_complete_minutes`
* This metric is the easiest to achieve. For starters, having information is better than no information.
* For transit users, **a higher proportion of `% complete minutes` means you are getting predictions for longer stretches of time before the bus arrives**.
* <span style="color:#4477aa">**Goal:** increase the share of complete minutes.</span>
* Note: Newmark paper shows that among four CA operators, this metric is fairly easy to reach and operators can even reach up to 90% completeness.

In [None]:
chart_utils.histogram_line_chart_by_date(
    daily_df2, 
    metric_column = "pct_tu_complete_minutes",
    legend_color_column = "service_date"
) 

In [None]:
chart_utils.histogram_line_chart_by_date(
    daytype_df2, 
    metric_column = "pct_tu_complete_minutes",
    legend_color_column = "day_type"
) 

## Simpler Expected Wait Time 
* column used: `avg_prediction_error_minutes`, `prediction_error_label`
* For transit users, **shorter wait times (lower positive values) without missing the bus (few negative values)** result in more pleasant transit journeys.
* <span style="color:#4477aa">**Goal:** decrease expected wait time and decrease late predictions.</span>
* This metric attempts a more generous approach towards determining accuracy, by categorizing `avg_prediction_error_minutes` with the 1-3 min, 3-5 min, and 5+ min thresholds.
   * ontime is between 1 min early to 1 min late

In [None]:
chart_utils.bar_chart_by_date(
    daily_df, "prediction_error_label", 
    is_stacked=False
)

In [None]:
chart_utils.bar_chart_by_date(
    daily_df, "prediction_error_label", 
    is_stacked=True
)

### Prediction Error for Weekday Stops Map

In [None]:
chart_utils.make_map(daytype_df, "prediction_error_label")

## Prediction Inconsistency 

* column used: `avg_prediction_spread_minutes`
* This metric wants to **distinguish between consistent but inaccurate and inconsistent but accurate** prediction patterns.
   * Consistent predictions contribute to low user trust in the information generally.
   * Inconsistent but accurate predictions fluctuate, making it difficult for trip planning ahead of time. However, the up-to-date information can alleviate the discomfort. [Research](https://www.sciencedirect.com/science/article/abs/pii/S0965856416303494) has shown that waiting time is negatively perceived, but having real-time information communicated reduces that perceived waiting time.  
* For transit users, **less inconsistency (lower positive values)** mean fewer fluctuations in predictions.
* <span style="color:#4477aa">**Goal:** Tighten the box with values closer to zero.</span>

In [None]:
chart_utils.boxplot_by_date(daily_df, "avg_prediction_spread_minutes")

## Descriptives Table with Detailed Percentiles

Take a look at percentiles for the metrics we have, so we can be comfortable moving towards a day type aggregation (weekday/Sat/Sunday) in the future.

In [None]:
metric_cols = [
    "avg_prediction_error_minutes",
    "pct_tu_accurate_minutes",
    "pct_tu_predictions_early",
    "pct_tu_predictions_ontime", 
    "pct_tu_predictions_late",
    "n_predictions",
    "avg_prediction_spread_minutes",
    "pct_tu_complete_minutes",
]

In [None]:
def format_table(
    df: pd.DataFrame, title: str
) -> GT:
    """
    Quickly format table of descriptives
    """
    table = (
        GT(df)
         .fmt_percent(
             columns=["pct_tu_accurate_minutes", "pct_tu_complete_minutes",
                      "pct_tu_predictions_early", "pct_tu_predictions_ontime",
                      "pct_tu_predictions_late"
                     ], 
             decimals=1
         ).fmt_number(
             columns = [
                 "avg_prediction_spread_minutes", 
                 "avg_prediction_error_minutes", 
             ], decimals=2
        ).fmt_number(
             columns = ["n_predictions"], 
             decimals=0,
         )
        .cols_label(
            avg_prediction_error_minutes = "Prediction Error (minutes)",
            avg_prediction_spread_minutes = "Prediction Spread / Wobble (minutes)",
            pct_tu_accurate_minutes = "% Minutes with Accurate Prediction",
            pct_tu_complete_minutes = "% Minutes with Trip Updates",
            pct_tu_predictions_early = "% Early Predictions",
            pct_tu_predictions_ontime = "% OnTime Predictions",
            pct_tu_predictions_late = "% Late Predictions",
            n_predictions = "Total Predictions",
        )
        .tab_options(table_font_size="12px")
        .tab_header(title = title)

    )
 
    return table

In [None]:
# get all the percentiles, except counts, because we need to format separately
format_table(
    daytype_df[
        daytype_df.day_type=="Weekday"
    ][metric_cols].describe(
        prep_data.PERCENTILE_LIST
    ).drop(index="count").reset_index(), 
    "Weekday Descriptives"
)

In [None]:
# format the counts row by making it all numbers
format_table(
    daytype_df[
        daytype_df.day_type=="Weekday"
    ][metric_cols].describe().head(1), 
    ""
).fmt_number(decimals=0)

## Priority Stops

These are the weekday stops identified with either:
* over 25% of predictions late
* average prediction error of 3 minutes early *or* late

It will be possible for operators to not have any stops that meet this threshold.

In [None]:
keep_cols = [
    "month", "year", "day_type", 
    "stop_id", "stop_name",
    "pct_tu_predictions_early", "pct_tu_predictions_ontime", "pct_tu_predictions_late",
    "avg_prediction_error_minutes", "avg_prediction_spread_minutes",
    "prediction_error_label", "n_predictions",
    "geometry"
]

priority_df = daytype_df[
    (daytype_df.day_type == "Weekday") & ( 
        (daytype_df.pct_tu_predictions_late > 0.25) & 
        (daytype_df.avg_prediction_error_minutes.abs() > 3) 
    )
][keep_cols]

if len(priority_df) > 0:

    m = chart_utils.plot_basic_map(
        priority_df, 
        plot_col = "prediction_error_label", 
        colorscale = chart_utils.FULL_CATEGORICAL_COLORS
    )
    
else:
    m = "No stops fit this criteria, and that's a good thing!"

display(m)