In [1]:
import _charts
import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import great_tables as gt
import pandas as pd
from calitp_data_analysis import calitp_color_palette as cp
from great_tables import md
from IPython.display import HTML, Markdown, display
from segment_speed_utils.project_vars import RT_SCHED_GCS
from shared_utils import rt_dates, rt_utils

alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)
from typing import List, Union

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### General Transit Feed Specification Digest
* We are already working on comparing real-time with schedule data. 
* We can get some insights that can be used to improve the experience of riding transit.
* This example is using San Francisco Muni and a popular route that goes from Caltrain (a transit hub) to Chinatown/Union Square to the Presidio going in ONE direction.
* There are other insights that can be derived from GTFS data but let's focus on 2 examples. 

In [3]:
name = "Bay Area 511 Muni Schedule"

In [6]:
# calitp-analytics-data/data-analyses/rt_vs_schedule/digest
df = pd.read_parquet(
    f"{RT_SCHED_GCS}digest/schedule_vp_metrics.parquet",
    filters=[[("name", "==", name), ("route_combined_name", "==", "30 STOCKTON")]],
)

In [7]:
def frequency_tags(row):
    if row.frequency < 2:
        return "<1 trip/hour"
    elif 1 <= row.frequency < 2:
        return "1 trip/hour"
    elif 2 <= row.frequency < 3:
        return "2 trips/hour"
    elif 3 <= row.frequency:
        return "3+ trips/hour"
    else:
        return "No Info"


df["frequency_cat"] = df.apply(frequency_tags, axis=1)

In [10]:
def rt_sched_journey_ratio_tags(row):
    if row.rt_sched_journey_ratio < 1:
        return "Early"
    elif row.rt_sched_journey_ratio < 1.1:
        return "On Time"
    elif 1.1 <= row.rt_sched_journey_ratio < 1.51:
        return "Late by 1-50% of the sch. time"
    elif 1.26 <= row.rt_sched_journey_ratio < 1.51:
        return "Late by 26-50% of the sch. time"
    elif 1.51 <= row.rt_sched_journey_ratio < 2:
        return "Late by 50%+ of the sch. time"
    elif 2 <= row.rt_sched_journey_ratio:
        return "Trip took more than 2x the sch. time"
    else:
        return "No Info"


df["rt_sched_journey_ratio_cat"] = df.apply(rt_sched_journey_ratio_tags, axis=1)

#### How often does my bus pass by? 

In [11]:
(
    _charts.grouped_bar_chart(
        df.loc[
            (df.time_period != "all_day") & (df.direction_id == 0)
        ],
        "time_period",
        "frequency",
        "time_period",
        "Frequency of Route in Direction 0",
        "Frequency tracks the number of times per hour this route goes by direction and time period.",
    )
)

In [12]:
def timeliness_trips(df: pd.DataFrame):
    to_keep = [
        "service_date",
        "organization_name",
        "direction_id",
        "time_period",
        "route_combined_name",
        "is_early",
        "is_ontime",
        "is_late",
        "n_vp_trips",
    ]
    df = df[to_keep]
    df2 = df.loc[df.time_period != "all_day"].reset_index(drop=True)

    melted_df = df2.melt(
        id_vars=[
            "service_date",
            "organization_name",
            "route_combined_name",
            "time_period",
            "direction_id",
        ],
        value_vars=["is_early", "is_ontime", "is_late"],
    )
    return melted_df

In [13]:
df_timeliness = timeliness_trips(df)

#### How on time is this route?

In [15]:
(
    alt.Chart(df_timeliness.loc[df_timeliness.direction_id == 0])
    .mark_bar(size=15)
    .encode(
        x=alt.X(
            "yearmonthdate(service_date):O",
            title=["Service Date"],
            axis=alt.Axis(labelAngle=-45, format="%b %Y"),
        ),
        y=alt.Y(
            "value:Q",
            title=_charts.labeling("value"),
            scale=alt.Scale(
                domain=[
                    (df_timeliness.value.min()),
                    (df_timeliness.value.max() + 10),
                ]
            ),
        ),
        color=alt.Color(
            "variable:N",
            title=_charts.labeling("variable"),
            scale=alt.Scale(range=_charts.blue_palette),
        ),
        tooltip=df_timeliness.columns.tolist(),
    )
).properties(width=300, height=300).facet(
    column=alt.Column("time_period:N", title=_charts.labeling("Timeliness of Trips")),
)

In [17]:
"""_charts.heatmap(
    df.loc[df.direction_id == 0],
    "rt_sched_journey_ratio_cat",
    "Realtime vs. Scheduled Trip Times",
    "Dividing total trip times by total scheduled trip times gives an idea of how on schedule a trip on this route typically is.",
    "A ratio of 1.13 indicates a trip is late 13% over its scheduled time.",
    "",
)"""

'_charts.heatmap(\n    df.loc[df.direction_id == 0],\n    "rt_sched_journey_ratio_cat",\n    "Realtime vs. Scheduled Trip Times",\n    "Dividing total trip times by total scheduled trip times gives an idea of how on schedule a trip on this route typically is.",\n    "A ratio of 1.13 indicates a trip is late 13% over its scheduled time.",\n    "",\n)'