In [None]:
%%capture

import warnings
warnings.filterwarnings("ignore")

import altair as alt
import pandas as pd

import calitp_data_analysis.magics
from great_tables import GT

import chart_utils
import prep_operator_data
from rt_msa_utils import PREDICTIONS_GCS

alt.data_transformers.enable("vegafusion")

In [None]:
# Comment out, this is `parameters` tagged cell
#name = "Torrance Schedule"

# {name} 

## GTFS Stop Time Updates {name} Summary

Generally, we want better transit user experience. Specifically, the performance metrics we can derive from GTFS RT Trip Updates distills into the following objectives:
1. Increase prediction reliability and accuracy
2. Increase the availability and completeness of GTFS RT
3. Decrease the inconsistency and fluctuations of predictions 

In [None]:
OPERATOR_FILE = "test_dp2_fct_daily_schedule_rt_operator_summary"
date_period = "2025-06-01_2025-06-15"

df = pd.read_parquet(
    f"{PREDICTIONS_GCS}{OPERATOR_FILE}_{date_period}.parquet",
    filters = [[("schedule_name", "==", name)]]
)

trip_updates_metrics_for_table = prep_operator_data.prep_trip_updates_metrics(df)

In [None]:
operator_decile_df = prep_operator_data.explode_decile_array_to_long(
    df)

chart_utils.fig5and6_prediction_error_plots(operator_decile_df)

In [None]:
#https://github.com/posit-dev/great-tables/blob/a59301b6cd6b4f42c035417f4836a7e16e842f2e/great_tables/_data_color/constants.py#L4
(GT(trip_updates_metrics_for_table)
 .fmt_percent(
     columns = ["pct_tu_complete_minutes", "pct_tu_accurate_minutes", 
               "bus_catch_likelihood", "pct_tu_trips"],
     decimals=1
 ).fmt_integer(
     columns = ["n_predictions"],
 ).fmt_number(
     columns = ["tu_messages_per_minute", "avg_prediction_spread_minutes", ],
     decimals=1
 ).cols_label(
     n_predictions = "# Stop Time Update Messages",
     pct_tu_complete_minutes = "% Minutes with 2+ Stop Time Update Message",
     pct_tu_accurate_minutes = "% Minutes with Accurate Predictions",
     avg_prediction_spread_minutes = "Prediction Spread / Wobble (minutes)",
     tu_messages_per_minute = "Avg Stop Time Updates per Minute",
     bus_catch_likelihood = "Bus Catch Likelihood (early + ontime predictions)",
     prediction_padding_minutes = "Prediction Padding (minutes added to avoid missing bus)",
     prediction_error_minutes_iqr = "Prediction Error IQR (75th - 25th percentile) (minutes)",
     prediction_error_minutes_p50 = "Prediction Error 50th percentile (minutes)",
     pct_tu_trips = "% Scheduled Trips with Trip Updates",

).cols_hide(
     columns = [
         "schedule_name", "pct_predictions_early", "pct_predictions_ontime"
    ] + [f"prediction_error_sec_p{i}" for i in [10, 25, 50, 75, 90]] 
 )
 .tab_options(table_font_size="12px")
 .tab_header(title = name)
 #.data_color(
 #   columns=['pct_tu_complete_minutes', 'pct_tu_accurate_minutes'],
 #   palette=["lightpink1", "mintcream"],
 #   domain=[0, 1]
#)
     .data_color(
     columns = ['bus_catch_likelihood', "pct_tu_trips"],
     palette="YlGn",
 ).data_color(
     columns = ['tu_messages_per_minute'],
     palette=["lightpink1", "mintcream"],
     domain=[0, 3.1]
 ).data_color(
     columns = ["prediction_padding_minutes", "prediction_error_minutes_iqr"],
     palette=["lightyellow1", "goldenrod3"]#"YlOrRd",
 )
)