In [None]:
%%capture

import warnings
warnings.filterwarnings("ignore")

import altair as alt
import geopandas as gpd
import pandas as pd

import calitp_data_analysis.magics
from great_tables import GT

import viz_stop_metrics
import chart_utils

alt.data_transformers.enable("vegafusion")

In [None]:
# Comment out, this is `parameters` tagged cell
#name = "Torrance Schedule"

In [None]:
%%capture_parameters
name

In [None]:
gdf1 = viz_stop_metrics.import_stop_df(
    is_daily = True, filters = [[("name", "==", name)]]
)

# {name} 
## Time-Series
### Availability and Reliability of Acceptable StopTimeUpdate Messages

* 2+ trip updates with stop arrival information in each minute interval
* higher accuracy (prediction is accurate if it falls within the bounds of `-60ln(Time to Prediction+1.3) < Prediction Error < 60ln(Time
to Prediction+1.5)`, the further out before arrival, the more generous the buffer.
* Time period: 30 minutes before the arrival at stop

In [None]:
stop_grouping_cols = ["name", "weekday_weekend", "stop_id"]
plot_cols = [
    "pct_accurate_minutes", "pct_complete_minutes"
]

d = "Weekday"
chart = chart_utils.make_2d_histogram(
    gdf1[gdf1.weekday_weekend==d][stop_grouping_cols + plot_cols],
    title = f"% complete and accurate predictions for {d} trips by stop"
)

chart

In [None]:
d = "Saturday"
chart = chart_utils.make_2d_histogram(
    gdf1[gdf1.weekday_weekend==d][stop_grouping_cols + plot_cols],
    title = f"% complete and accurate predictions for {d} trips by stop"
)

chart

In [None]:
d = "Sunday"
chart = chart_utils.make_2d_histogram(
    gdf1[gdf1.weekday_weekend==d][stop_grouping_cols + plot_cols],
    title = f"% complete and accurate predictions for {d} trips by stop"
)

chart

In [None]:
metric_cols = [
    "avg_prediction_spread_minutes",
    "avg_prediction_error_minutes",
    "pct_accurate_minutes",
    "pct_complete_minutes",
    "avg_predictions_per_trip",
]

In [None]:
def format_table(df, title):
    table = (
        GT(df.describe().reset_index())
         .fmt_percent(
             columns=["pct_accurate_minutes", "pct_complete_minutes"], 
             decimals=1
         ).fmt_number(
             columns = [
                 "avg_prediction_spread_minutes", 
                 "avg_prediction_error_minutes", 
                 "avg_predictions_per_trip"
             ], decimals=2)
        .cols_label(
            avg_prediction_error_minutes = "Prediction Error (minutes)",
            avg_prediction_spread_minutes = "Prediction Spread / Wobble (minutes)",
            pct_accurate_minutes = "% Minutes with Accurate Prediction",
            pct_complete_minutes = "% Minutes with Trip Updates",
            avg_predictions_per_trip = "# Predictions in 30 Minutes Before Arrival",
        )
        .tab_options(table_font_size="12px")
        .tab_header(title = title)
    )
    
    return table

In [None]:
d = "Weekday"
format_table(
    gdf1[gdf1.weekday_weekend==d][metric_cols], f"{d} Descriptives"
)

In [None]:
d = "Saturday"
format_table(
    gdf1[gdf1.weekday_weekend==d][metric_cols], f"{d} Descriptives"
)

In [None]:
d = "Sunday"
format_table(
    gdf1[gdf1.weekday_weekend==d][metric_cols], f"{d} Descriptives"
)

### Prediction Accuracy 

* Find how "accurate" the prediction is, based on whether it falls within the bounds of `-60ln(Time to Prediction+1.3) < Prediction Error < 60ln(Time
to Prediction+1.5)`, the further out before arrival, the more generous the buffer.
* In contrast to the `True/False` of whether a prediction is deemed accurate or not, this metric finds the "error" in minutes.### Weekday Descriptives
* Positive values = arrival came **after** the prediction. 
* Time period: 30 minutes before the arrival at stop

In [None]:
plot_col = "avg_prediction_error_minutes"

boxplot = chart_utils.make_boxplot_by_day_type(
    gdf1, plot_col, "Avg Prediction Error (minutes)")

boxplot

### Availability of StopTimeUpdate Messages  

* How many predictions are we getting for each stop (per trip, to normalize) in the 30 minute period before arrival?
* Similar to the `True/False` whether there were at least 2 predictions per minute, this gets the counts (3 predictions per minute, for 30 minutes, yields 90 predictions).
* Time period: 30 minutes before the arrival at stop

In [None]:
plot_col = "avg_predictions_per_trip"
# in the 30 minute period before arrival

boxplot = chart_utils.make_boxplot_by_day_type(
    gdf1, plot_col, f"# Predictions 30 min before Arrival")

boxplot

## Aggregated Stops by Day Type 

Maps show the metrics by only **weekday** so far.

In [None]:
#del gdf1
gdf2 = viz_stop_metrics.import_stop_df(
    is_daily = False, filters = [[("name", "==", name)]]
)

### Prediction Inconsistency (Wobble)
* If the prediction is changing from minute to minute, a large spread would show up.
* If the prediction is fairly consistent, we would see small spread.
* It is possible for the predicted stop arrival to be **consistent, yet inaccurate** or **inconsistent, yet fairly accurate as it approaches the stop**.
* These touch on different transit user experiences.
  * Inconsistent predictions are difficult for trip planning in real-time, though keeping customers up-to-date with reliable stop arrivals can alleviate the experience.
  * Consistently inaccurate predictions are difficult to plan around and can result in low trust.

In [None]:
stop_grouping_cols = ["name", "weekday_weekend", "stop_id", "stop_name"]

plot_col = "avg_prediction_error_minutes"

chart_utils.make_layered_histogram(
    gdf2[stop_grouping_cols + [plot_col]],
    plot_col,
    step_size=0.25 # 15 sec
).properties(
    title = "Avg Prediction Error (minutes)",
)

In [None]:
plot_col = "pct_accurate_minutes"

chart_utils.make_layered_histogram(
    gdf2[stop_grouping_cols + [plot_col]],
    plot_col,
    step_size=0.05 
).properties(
    title = "% Accurate Predictions",
)

### Accurate Predictions

In [None]:
plot_col = "pct_accurate_minutes"
m = chart_utils.stop_map_of_metric(gdf2[gdf2.weekday_weekend=="Weekday"], plot_col)
m