# pct_accuracy vs n_predictions_early/ontime/late

1. Make sure that `early/on_time/late` metrics make sense with `pct_accuracy`
1. Deeper dive into the "accuracy" exponential curve. See if we can use this as "expected wait time" metric.

In [1]:
import altair as alt
import numpy as np
import pandas as pd

import prep_data
from rt_msa_utils import PREDICTIONS_GCS, RT_MSA_DICT

alt.data_transformers.enable("vegafusion")

DOWNLOAD_DICT = RT_MSA_DICT.rt_trip_updates_downloads

In [2]:
FILE = RT_MSA_DICT.rt_trip_updates_downloads.daily_stop_grain

def group_early_ontime(df: pd.DataFrame):
    # Newmark does group early/ontime together for "not miss bus" group
    df = df.assign(
        pct_tu_predictions_early_ontime = df[
            ["pct_tu_predictions_early", "pct_tu_predictions_ontime"]
        ].sum(axis=1)
    )
    
    return df


daily_df = pd.read_parquet(
    f"{PREDICTIONS_GCS}{FILE}.parquet"
).pipe(
    prep_data.calculate_percents
).pipe(
    prep_data.drop_outliers,
    prep_data.MIN_ERROR_SEC, 
    prep_data.MAX_ERROR_SEC
).pipe(group_early_ontime)

daily_df.dtypes

key                                        object
stop_key                                   object
base64_url                                 object
service_date                       datetime64[ns]
stop_id                                    object
schedule_feed_key                          object
avg_prediction_error_sec                  float64
n_tu_accurate_minutes                       Int64
n_tu_complete_minutes                       Int64
n_tu_minutes_available                      Int64
avg_prediction_spread_minutes             float64
n_predictions                               Int64
n_predictions_early                         Int64
n_predictions_ontime                        Int64
n_predictions_late                          Int64
n_tu_trips                                  Int64
pct_tu_predictions_early                  Float64
pct_tu_predictions_ontime                 Float64
pct_tu_predictions_late                   Float64
pct_tu_accurate_minutes                   Float64


In [3]:
def accuracy_bounds(minutes_to_prediction: float) -> tuple[float]:
    """
    Based on minutes until arrival, how many seconds difference
    can actual_arrival and predicted_arrival differ.
    """
    lower_bound_sec = -60 * np.log(minutes_to_prediction+1.3) 
    upper_bound_sec = 60* np.log(minutes_to_prediction+1.5)
    
    lower_bound_min = lower_bound_sec / 60
    upper_bound_min = upper_bound_sec / 60
    
    return round(lower_bound_min, 2), round(upper_bound_min, 2)


In [4]:
for m in [0, 1, 2, 3, 4, 5, 10, 15, 20, 25, 30]:
    print(f"Minutes to Arrival: {m}")
    print(accuracy_bounds(m))

Minutes to Arrival: 0
(-0.26, 0.41)
Minutes to Arrival: 1
(-0.83, 0.92)
Minutes to Arrival: 2
(-1.19, 1.25)
Minutes to Arrival: 3
(-1.46, 1.5)
Minutes to Arrival: 4
(-1.67, 1.7)
Minutes to Arrival: 5
(-1.84, 1.87)
Minutes to Arrival: 10
(-2.42, 2.44)
Minutes to Arrival: 15
(-2.79, 2.8)
Minutes to Arrival: 20
(-3.06, 3.07)
Minutes to Arrival: 25
(-3.27, 3.28)
Minutes to Arrival: 30
(-3.44, 3.45)


In [5]:
def categorize_prediction_error(prediction_error: float) -> str:
    # early (positive values) mean prediction is earlier than actual arrival
    # bus comes after prediction (which means you will catch bus)
    VERY_EARLY_UPPER = 10*60
    EARLY_UPPER = 5*60
    LITTLE_EARLY_UPPER = 1*60
    LITTLE_LATE_UPPER = -1*60
    LATE_UPPER = -5*60
    VERY_LATE_UPPER = -10*60
    
    if prediction_error > VERY_EARLY_UPPER:
        return "very_early"
    elif (prediction_error <= VERY_EARLY_UPPER) and (prediction_error >= EARLY_UPPER):
        return "early"
    elif (prediction_error <= EARLY_UPPER) and (prediction_error >= LITTLE_EARLY_UPPER):
        return "little_early"
    elif (prediction_error <= LITTLE_EARLY_UPPER) and (prediction_error >= LITTLE_LATE_UPPER):
        return "ontime"
    elif (prediction_error <= LITTLE_LATE_UPPER) and (prediction_error >= LATE_UPPER):
        return "little_late"
    elif (prediction_error <= LATE_UPPER) and (prediction_error >= VERY_LATE_UPPER):
        return "little_late"
    elif (prediction_error < VERY_LATE_UPPER):
        return "very_late"
    else:
        return "unknown"

daily_df = daily_df.assign(
    prediction_error_category = daily_df.apply(
        lambda x: categorize_prediction_error(x.avg_prediction_error_sec), axis=1)
)

In [6]:
daily_df.prediction_error_category.value_counts()

ontime          460518
little_early    423497
little_late      30544
Name: prediction_error_category, dtype: int64

In [7]:
daily_df.prediction_error_category.value_counts(normalize=True)

ontime          0.503541
little_early    0.463061
little_late     0.033398
Name: prediction_error_category, dtype: float64

In [11]:
daily_df

Unnamed: 0,key,stop_key,base64_url,service_date,stop_id,schedule_feed_key,avg_prediction_error_sec,n_tu_accurate_minutes,n_tu_complete_minutes,n_tu_minutes_available,...,n_predictions_early,n_predictions_ontime,n_predictions_late,n_tu_trips,pct_tu_predictions_early,pct_tu_predictions_ontime,pct_tu_predictions_late,pct_tu_accurate_minutes,pct_tu_predictions_early_ontime,prediction_error_category
0,5fd1b1a829400b57519951e7e5409643,3b95038c8989c9dcd4871c11fb7081be,aHR0cHM6Ly9hcGkuZ29zd2lmdC5seS9yZWFsLXRpbWUvbG...,2025-06-08,7373,8d9623a1823a27925b7e2f00e44fc5bb,39.183454,7113,9123,9234,...,17070,315,9951,90,0.62,0.01,0.36,0.77,0.63,ontime
1,eb6c24f24772649fe54ae88566015a8d,fff2473a01ee0e1f7f3913e16bba795f,aHR0cHM6Ly9hcGkuZ29zd2lmdC5seS9yZWFsLXRpbWUvbG...,2025-06-08,1222,8d9623a1823a27925b7e2f00e44fc5bb,48.851525,9192,10191,10302,...,21303,546,8709,106,0.7,0.02,0.28,0.89,0.72,ontime
2,e763549740cb2e9dcd92e223cded777c,4ed1b3a7ad8900a527682cf50cd17862,aHR0cHM6Ly9hcGkuZ29zd2lmdC5seS9yZWFsLXRpbWUvbG...,2025-06-08,11909,8d9623a1823a27925b7e2f00e44fc5bb,58.596248,6633,7812,7935,...,16893,405,6153,85,0.72,0.02,0.26,0.84,0.74,ontime
3,3250d0d32752750f0c689e96fa3fed8c,a0fb4a11017f532098331c399fd31ca0,aHR0cHM6Ly9hcGkuZ29zd2lmdC5seS9yZWFsLXRpbWUvbG...,2025-06-08,176170,8d9623a1823a27925b7e2f00e44fc5bb,60.681358,4689,5961,6048,...,12420,411,5049,63,0.69,0.02,0.28,0.78,0.71,little_early
4,321d084fe34caf6494c5853b3edfe837,20de800adb403d08b883ff758ad00abf,aHR0cHM6Ly9hcGkuZ29zd2lmdC5seS9yZWFsLXRpbWUvbG...,2025-06-08,6372,8d9623a1823a27925b7e2f00e44fc5bb,61.335705,2865,3123,3159,...,7293,120,1947,33,0.78,0.01,0.21,0.91,0.79,little_early
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
914554,04e2b18da6d6c95c32c99aedfa470214,53394a01951ad335893e06a1e9991654,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...,2025-06-13,57776,80b17fbe1735c5f7c348eda4fb45b99a,60.053590,10659,13230,13368,...,27147,177,12309,136,0.68,0.0,0.31,0.8,0.68,little_early
914555,08b8fa793a58da149152da63126706a1,9dd642a1d82e6702aec1f789aeccbde9,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...,2025-06-13,83745,80b17fbe1735c5f7c348eda4fb45b99a,79.291473,9468,13230,13416,...,28023,135,11604,136,0.7,0.0,0.29,0.71,0.70,little_early
914556,1ff00e9ade83e1bbd7403872ece6a21e,adbf64adc0d0af82d94a67a78a0eab74,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...,2025-06-13,52271,80b17fbe1735c5f7c348eda4fb45b99a,93.003835,10665,13590,13746,...,32853,261,7644,140,0.81,0.01,0.19,0.78,0.82,little_early
914557,d6e1160ae86140b1cbb6a3de3ea288ee,ffd7ba875d6c61a8f7b55755fd752311,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L3RyaXB1cG...,2025-06-13,61957,80b17fbe1735c5f7c348eda4fb45b99a,36.926193,6786,13935,14130,...,14415,324,10059,144,0.34,0.01,0.24,0.48,0.35,ontime


Let's align closely to Swiftly contract language for calculations.

Scatterplot leading up to window of arrival.

In [8]:
FILE = RT_MSA_DICT.rt_schedule_models.weekday_stop_grain

daytype_df = pd.read_parquet(
    f"{PREDICTIONS_GCS}{FILE}.parquet",   
    columns = ["name", "year", "month", "day_type", "stop_id", "avg_prediction_error_sec"]
).pipe(
    prep_data.drop_outliers,
    prep_data.MIN_ERROR_SEC, 
    prep_data.MAX_ERROR_SEC
)

daytype_df = daytype_df.assign(
    prediction_error_category = daytype_df.apply(
        lambda x: categorize_prediction_error(x.avg_prediction_error_sec), axis=1)
)

In [9]:
daytype_df.prediction_error_category.value_counts()

ontime          925778
little_early    753024
little_late      48411
Name: prediction_error_category, dtype: int64

In [10]:
daytype_df.prediction_error_category.value_counts(normalize=True)

ontime          0.535995
little_early    0.435976
little_late     0.028028
Name: prediction_error_category, dtype: float64