## Round 1 
* Updating existing charts made by Tiffany. https://gtfs-digest--cal-itp-data-analyses.netlify.app/
* cd rt_segment_speeds && pip install altair_transform && pip install -r requirements.txt && cd ../_shared_utils && make setup_env

Links
* https://github.com/cal-itp/data-analyses/issues/1059
* https://docs.google.com/document/d/1I1WiqlmU06W6iLCi7cZQrOCLILkrEfABEkcU0Jys7f0/edit
* https://route-speeds--cal-itp-data-analyses.netlify.app/name_bay-area-511-muni-schedule/0__report__name_bay-area-511-muni-schedule
* https://posit-dev.github.io/great-tables/get-started/nanoplots.html
* https://docs.pola.rs/py-polars/html/reference/api/polars.from_pandas.html
* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/_rt_scheduled_utils.py
* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/_threshold_utils.py

In [40]:
import _section2_utils as section2
import _report_utils
import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import great_tables as gt
import pandas as pd
from calitp_data_analysis import calitp_color_palette as cp
from great_tables import md
from IPython.display import HTML, Markdown, display
from segment_speed_utils.project_vars import RT_SCHED_GCS, SCHED_GCS
from shared_utils import rt_dates, rt_utils

alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)
from typing import List, Union

from altair_transform.extract import extract_transform
from altair_transform.transform import visit
from altair_transform.utils import to_dataframe

In [41]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [42]:
name = 'SBMTD Schedule'

In [43]:
# %%capture_parameters
# name

### General Functions

In [44]:
red_green_yellow = ["#ec5d3b", "#fde18d", "#7cc665"]

### Data

In [45]:
df = section2.load_schedule_vp_metrics(name)

In [46]:
most_recent_date = df.service_date.max()

#### Unsure what these metrics mean
* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/segment_speed_utils/metrics.py
* avg_stop_miles: ??
* n_scheduled_trips: over the course of a day
* frequency: # of times the route runs per hour 
* pct_typology: 44% of the route fits into downtown local, the rest of the 54% falls under other categories??
* is_early: # of trips in that route that are early
* is_on_time: # of trips that are ontime.
* n_vp_trips: # of trips in total
* Difference between pct_rt_journey_atleast1vp and pct_sched_journey_atleast1vp?
* rt_sched_journey_ratio: how many times longer/shorter the actual trip took compared to the scheduled trip?

In [47]:
df.head(2)

Unnamed: 0,schedule_gtfs_dataset_key,direction_id,time_period,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,frequency,service_date,minutes_atleast1_vp,minutes_atleast2_vp,total_rt_service_minutes,total_scheduled_service_minutes,total_vp,vp_in_shape,is_early,is_ontime,is_late,n_vp_trips,vp_per_minute,pct_in_shape,pct_rt_journey_atleast1_vp,pct_rt_journey_atleast2_vp,pct_sched_journey_atleast1_vp,pct_sched_journey_atleast2_vp,rt_sched_journey_ratio,avg_rt_service_minutes,sched_rt_category,speed_mph,name,route_long_name,route_short_name,route_combined_name,route_id,typology,schedule_source_record_id,base64_url,organization_source_record_id,organization_name,caltrans_district,rt_sched_journey_ratio_cat,frequency_cat,vp_per_minute_cat,ruler_100_pct,ruler_for_vp_per_min
132107,239f3baf3dd3b9e9464f66a777f9897d,0.0,all_day,15.0,0.18,54,2.25,2023-09-13,863,258,900.19,795.0,1136,925,0,45,8,53,1.26,81.0,96.0,29.0,100.0,32.0,1.13,16.98,schedule_and_vp,8.92,SBMTD Schedule,West Santa Barbara,1,1 West Santa Barbara,1,rapid,rectQfIeiKDBeJSAV,aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZmVlZC56aXA=,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,05 - San Luis Obispo,Late by 1-25% of the scheduled time,2 trips/hour,<3 pings/minute,100,2
132108,239f3baf3dd3b9e9464f66a777f9897d,0.0,all_day,15.0,0.18,54,2.25,2023-09-13,863,258,900.19,795.0,1136,925,0,45,8,53,1.26,81.0,96.0,29.0,100.0,32.0,1.13,16.98,schedule_and_vp,8.92,SBMTD Schedule,West Santa Barbara,1,1 West Santa Barbara,1,rapid,rectQfIeiKDBeJSAV,aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZmVlZC56aXA=,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,05 - San Luis Obispo,Late by 1-25% of the scheduled time,2 trips/hour,<3 pings/minute,100,2


In [48]:
df.time_period.value_counts()

all_day    4089
peak       4089
offpeak    2687
Name: time_period, dtype: int64

### Only use `schedule_and_vp` for charts!!

In [49]:
df_sched_vp_both = df[df.sched_rt_category == "schedule_and_vp"]

In [50]:
df_sched_vp_both.shape

(9165, 44)

#### One Route

In [51]:
df_sched_vp_both.route_combined_name.unique()

array(['1 West Santa Barbara', '12X Goleta Express', '14 Montecito',
       '15X SBCC/UCSB Express', '17 Lower West/SBCC',
       '19X Carp SBCC Express', '2 East Santa Barbara', '20 Carpinteria',
       '2010 Alpha Resource Center', '23 El Encanto Heights',
       '2410 La Colina Jr. High', '2420 La Colina Jr. High',
       '2430 La Colina Jr. High', '25 Ellwood',
       '2510 San Marcos High School', '2520 San Marcos High School',
       '2530 San Marcos High School', '2540 San Marcos High School',
       '2610 Goleta Valley Jr. High', '2620 Goleta Valley Jr. High',
       '2630 Goleta Valley Jr. High', '2650 Goleta Valley Jr. High',
       '27 Isla Vista Shuttle', '2710 Dos Pueblos High School',
       '2720 Dos Pueblos High School', '2730 Dos Pueblos High School',
       '2740 Dos Pueblos High School', '28 UCSB Shuttle', '3 Oak Park',
       '4 Mesa/SBCC', '5 Mesa/La Cumbre', '6 Goleta',
       '7 County Health/Fairview', '2660 Goleta Valley Jr. High',
       '2750 Dos Pueblos High

In [52]:
# Has very little data: 652 Skyline High - Elmhurst Bay Area 511 AC Transit Schedule
# Has a lot of data: "18 Solano - Shattuck - MLK Jr."

In [53]:
one_route = df_sched_vp_both.loc[
    df_sched_vp_both.route_combined_name == '15X SBCC/UCSB Express'
]

In [54]:
one_route.shape

(216, 44)

In [55]:
one_route.sample()

Unnamed: 0,schedule_gtfs_dataset_key,direction_id,time_period,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,frequency,service_date,minutes_atleast1_vp,minutes_atleast2_vp,total_rt_service_minutes,total_scheduled_service_minutes,total_vp,vp_in_shape,is_early,is_ontime,is_late,n_vp_trips,vp_per_minute,pct_in_shape,pct_rt_journey_atleast1_vp,pct_rt_journey_atleast2_vp,pct_sched_journey_atleast1_vp,pct_sched_journey_atleast2_vp,rt_sched_journey_ratio,avg_rt_service_minutes,sched_rt_category,speed_mph,name,route_long_name,route_short_name,route_combined_name,route_id,typology,schedule_source_record_id,base64_url,organization_source_record_id,organization_name,caltrans_district,rt_sched_journey_ratio_cat,frequency_cat,vp_per_minute_cat,ruler_100_pct,ruler_for_vp_per_min
132834,239f3baf3dd3b9e9464f66a777f9897d,1.0,peak,37.2,0.2,9,1.12,2023-10-11,292,58,317.15,354.0,357,356,4,5,0,9,1.13,100.0,92.0,18.0,82.0,16.0,0.9,35.24,schedule_and_vp,26.67,SBMTD Schedule,SBCC/UCSB Express,15X,15X SBCC/UCSB Express,15X,express,rectQfIeiKDBeJSAV,aHR0cHM6Ly9zYm10ZC5nb3YvZ29vZ2xlX3RyYW5zaXQvZmVlZC56aXA=,recswCrw6a6htmXJ4,Santa Barbara Metropolitan Transit District,05 - San Luis Obispo,Early,<1 trip/hour,<3 pings/minute,100,2


In [56]:
one_route_all_day = one_route.loc[one_route.time_period == "all_day"]

In [57]:
one_route_all_day.service_date.value_counts()

2023-09-13    6
2023-10-11    6
2023-12-13    6
2024-01-17    6
2024-02-14    6
2024-03-13    6
2023-03-15    6
2023-04-12    6
2023-05-17    6
2023-06-14    6
2023-07-12    6
2023-08-15    6
Name: service_date, dtype: int64

#### Avg Scheduled Minutes (Done)
* How come the average_scheduled_minutes is a lot shorter than total_rt_service_minutes and total_scheduled_service_minutes?
* Kind of a boring chart? Should compare actual run time compared to service minutes?

In [58]:
def grouped_bar_chart(
    df: pd.DataFrame,
    color_col: str,
    y_col: str,
    offset_col: str,
    title: str,
    subtitle: str,
):
    tooltip_cols = [
        "direction_id",
        "time_period",
        "route_combined_name",
        "organization_name",
        "service_date",
        color_col,
        y_col,
    ]
    chart = (
        alt.Chart(df)
        .mark_bar(size=10)
        .encode(
            x=alt.X(
                "yearmonthdate(service_date):O",
                title=["Grouped by Direction ID", "Date"],
                axis=alt.Axis(format="%b %Y"),
            ),
            y=alt.Y(f"{y_col}:Q", title=_report_utils.labeling(y_col)),
            xOffset=alt.X(f"{offset_col}:N", title=_report_utils.labeling(offset_col)),
            color=alt.Color(
                f"{color_col}:N",
                title=_report_utils.labeling(color_col),
                scale=alt.Scale(
                    range=_report_utils.red_green_yellow,
                ),
            ),
            tooltip=tooltip_cols,
        )
    )
    chart = (chart).properties(
        title={
            "text": [title],
            "subtitle": [subtitle],
        },
        width=500,
        height=300,
    )

    return chart

In [59]:
one_route[one_route.time_period == "all_day"][['service_date','direction_id','avg_scheduled_service_minutes']].sort_values(by = ['service_date','direction_id'])

Unnamed: 0,service_date,direction_id,avg_scheduled_service_minutes
925781,2023-03-15,1.0,37.2
925782,2023-03-15,1.0,37.2
925783,2023-03-15,1.0,37.2
925784,2023-03-15,1.0,37.2
925785,2023-03-15,1.0,37.2
925786,2023-03-15,1.0,37.2
925787,2023-04-12,1.0,37.2
925788,2023-04-12,1.0,37.2
925789,2023-04-12,1.0,37.2
925790,2023-04-12,1.0,37.2


In [60]:
section2.grouped_bar_chart(
    df=one_route[one_route.time_period == "all_day"].drop_duplicates(),
    color_col="direction_id",
    y_col="avg_scheduled_service_minutes",
    offset_col="direction_id",
    title="Average Scheduled Minutes",
    subtitle="The average minutes a trip is scheduled to run.",
)

#### Timeliness #2 (Done)

In [61]:
timeliness = section2.timeliness_trips(one_route)

In [62]:
timeliness.sample()

Unnamed: 0,service_date,organization_name,route_combined_name,time_period,direction_id,variable,value
112,2023-03-15,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,peak,1.0,is_early,8


In [63]:
timeliness.loc[timeliness.service_date == "2023-04-12"].sort_values(by = ['service_date','direction_id','time_period'])

Unnamed: 0,service_date,organization_name,route_combined_name,time_period,direction_id,variable,value
78,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_early,1
79,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_early,1
80,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_early,1
81,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_early,1
82,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_early,1
83,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_early,1
222,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_ontime,4
223,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_ontime,4
224,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_ontime,4
225,2023-04-12,Santa Barbara Metropolitan Transit District,15X SBCC/UCSB Express,offpeak,1.0,is_ontime,4


In [64]:
section2.base_facet_chart(
    timeliness.loc[timeliness.direction_id == 0].drop_duplicates(),
    "value",
    "variable",
    "time_period",
    "Breakdown of Trips by Categories for Direction 0",
    "Trips broken down by VP Only (found only in real-time data), Schedule Only (found only in schedule data), or both.",
).interactive()

#### Total Scheduled Trips (Don't Use, boring)

In [65]:
# IF we take away direction, see how many times a route is scheduled to run
total_scheduled_trip = (
    one_route.groupby(["service_date", "time_period"])
    .agg({"n_scheduled_trips": "mean"})
    .reset_index()
)

In [66]:
total_scheduled_trip.n_scheduled_trips = total_scheduled_trip.n_scheduled_trips / 2

In [67]:
total_scheduled_trip.head()

Unnamed: 0,service_date,time_period,n_scheduled_trips
0,2023-03-15,all_day,7.5
1,2023-03-15,offpeak,3.0
2,2023-03-15,peak,4.5
3,2023-04-12,all_day,7.5
4,2023-04-12,offpeak,3.0


In [68]:
one_route.groupby(["service_date", "direction_id", "time_period"]).agg(
    {"n_scheduled_trips": "max"}
).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n_scheduled_trips
service_date,direction_id,time_period,Unnamed: 3_level_1
2023-03-15,1.0,all_day,15
2023-03-15,1.0,offpeak,6
2023-03-15,1.0,peak,9
2023-04-12,1.0,all_day,15
2023-04-12,1.0,offpeak,6


In [69]:
section2.grouped_bar_chart(
    df=one_route.loc[one_route.time_period != "all_day"].drop_duplicates(),
    color_col="time_period",
    y_col="n_scheduled_trips",
    offset_col="direction_id",
    title="Total Scheduled Trips",
    subtitle="How many times per day is this route is scheduled to run in one particular direction.",
)

#### Frequency (Done)
* Maybe shouldn't be a chart since there doesn't seem to be a lot of data for this across a lot of the routes?
* What does frequency mean?
* Simplify down to not take direction_id into consideration?

In [70]:
section2.frequency_chart(one_route)

#### Speed MPH (Done)
* Needs a different type of chart.

In [71]:
section2.base_facet_line(
    one_route,
    "speed_mph",
    "Average Speed",
    "The average miles per hour the bus travels by direction and time of day.",
)

#### VP per Minute (WIP)
* Heatmap too confusing & detailed

In [72]:
def add_background():
    # Sample data
    data = pd.DataFrame(
        {
            "x": range(9),
            "y": [
                0,
                0.5,
                1.5,
                2.5,
                3.5,
                2,
                1,
                3,
                4,
            ],
        }
    )

    # Background rectangle for color bands
    bg_chart = (
        alt.Chart(data)
        .mark_rect()
        .encode(
            y=alt.Y("y:Q", axis=None),
            y2="y2:Q",
            color=alt.Color("y:Q", scale=alt.Scale(range=red_green_yellow)),
            opacity=alt.value(0.2),  # Adjust opacity as needed
        )
        .transform_calculate(y2="datum.y + 1")
    )

    return bg_chart

In [73]:
section2.base_facet_with_ruler_chart(
                one_route.drop_duplicates(),
                "vp_per_minute",
                "ruler_for_vp_per_min",
                "Vehicle Positions per Minute",
                "Trips should have 2+ vehicle positions per minute.",
            )

In [74]:
blue_palette = ["#B9D6DF", "#2EA8CE", "#0B405B"]

#### Spatial Accuracy (Done)
* Multiple it by 100? Should this be rounded or not?

In [75]:
one_route.loc[one_route.time_period == "all_day"][['service_date','pct_in_shape']]

Unnamed: 0,service_date,pct_in_shape
132755,2023-09-13,100.0
132756,2023-09-13,100.0
132757,2023-09-13,100.0
132758,2023-09-13,100.0
132759,2023-09-13,100.0
132760,2023-09-13,100.0
132761,2023-10-11,100.0
132762,2023-10-11,100.0
132763,2023-10-11,100.0
132764,2023-10-11,100.0


In [76]:
section2.base_facet_with_ruler_chart(
    one_route.loc[one_route.time_period == "all_day"].drop_duplicates(),
    "pct_in_shape",
    "ruler_100_pct",
    "Spatial Accuracy",
    "The percentage of vehicle positions that fall within the static scheduled route shape reflects the accuracy of the spatial, realtime data.",
)

#### % RT journey with 1+/2+ vp (goal line = 100%) - use all_day, one chart shared for 1+ and 2+ (Done need subtitle)

In [77]:
def pct_vp_journey(df: pd.DataFrame, col1: str, col2: str) -> pd.DataFrame:
    to_keep = [
        "service_date",
        "organization_name",
        "direction_id",
        col1,
        col2,
        "route_combined_name",
        "time_period",
        "route_id",
        "ruler_100_pct",
    ]
    df2 = df[to_keep]

    df3 = df2.melt(
        id_vars=[
            "service_date",
            "organization_name",
            "route_combined_name",
            "direction_id",
            "time_period",
            "route_id",
            "ruler_100_pct",
        ],
        value_vars=[col1, col2],
    )

    return df3

In [78]:
pct_rt = pct_vp_journey(
    one_route, "pct_rt_journey_atleast1_vp", "pct_rt_journey_atleast2_vp"
)

#### % schedule journey with 1+/2+ vp (goal line = 100%) - use all_day, one chart shared for 1+ and 2+ (Done need subtitle)

In [79]:
schd_rt = pct_vp_journey(
    one_route, "pct_sched_journey_atleast1_vp", "pct_sched_journey_atleast2_vp"
)

In [80]:
""" base_facet_circle(
    schd_rt.loc[schd_rt.time_period == "all_day"],
    "value",
    "ruler_100_pct",
    "Percentage of Scheduled Trips with 1+ and 2+ Vehicle Positions",
    "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
)"""

' base_facet_circle(\n    schd_rt.loc[schd_rt.time_period == "all_day"],\n    "value",\n    "ruler_100_pct",\n    "Percentage of Scheduled Trips with 1+ and 2+ Vehicle Positions",\n    "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",\n)'

#### Number of trips by sched_vp_category (/operator_schedule_rt_category)
* Why is the crosswalk missing SBMTD schedule??

In [81]:
op_sched_rt_cat = pd.read_parquet(
    f"{RT_SCHED_GCS}digest/operator_schedule_rt_category.parquet"
)

In [82]:
list(df.schedule_gtfs_dataset_key.unique())

['239f3baf3dd3b9e9464f66a777f9897d', 'de792182088eecc3d5c0bd3f1df62965']

In [83]:
test = section2.load_operator_schedule_rt_category("de792182088eecc3d5c0bd3f1df62965")

In [84]:
op_sched_rt_cat.schedule_gtfs_dataset_key.nunique()

272

In [85]:
def basic_bar_chart(
    df: pd.DataFrame, y_col: str, color_col: str, title: str, subtitle: str
):
    chart = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X(
                f"yearmonthdate(service_date):O",
                title="Service Date",
                axis=alt.Axis(format="%b %Y"),
            ),
            y=alt.Y(f"{y_col}:Q", title=labeling(y_col)),
            color=alt.Color(
                f"{color_col}:N",
                scale=alt.Scale(
                    range=cp.CALITP_SEQUENTIAL_COLORS,
                ),
            ),
            tooltip=df.columns.tolist(),
        )
    ).properties(
        title={
            "text": title,
            "subtitle": subtitle,
        },
        width=500,
        height=300,
    )
    return chart

In [86]:
"""basic_bar_chart(
    op_sched_rt_cat,
    "n_trips",
    "sched_rt_category",
    "Trips that were found in GTFS, Schedule, or Both Data Sources",
    "",
) """

'basic_bar_chart(\n    op_sched_rt_cat,\n    "n_trips",\n    "sched_rt_category",\n    "Trips that were found in GTFS, Schedule, or Both Data Sources",\n    "",\n) '

#### Text

In [87]:
def create_text_table(df: pd.DataFrame, direction_id: str):

    df = (
        df.loc[df.direction_id == direction_id].drop_duplicates().reset_index(drop=True)
    )

    if len(df) == 0:
        text_chart = create_data_unavailable_chart()
        return text_chart

    else:
        df2 = df.melt(
            id_vars=[
                "route_combined_name",
                "direction_id",
            ],
            value_vars=[
                "avg_scheduled_service_minutes",
                "avg_stop_miles",
                "n_scheduled_trips",
                "sched_rt_category",
                "peak_avg_speed",
                "peak_scheduled_trips",
                "peak_hourly_freq",
                "offpeak_avg_speed",
                "offpeak_scheduled_trips",
                "offpeak_hourly_freq",
            ],
        )
        # Create a decoy column to center all the text
        df2["Zero"] = 0

        df2.variable = df2.variable.str.replace("_", " ").str.title()
        df2 = df2.sort_values(by=["direction_id"]).reset_index(drop=True)
        df2["combo_col"] = df2.variable.astype(str) + ": " + df2.value.astype(str)
        text_chart = (
            alt.Chart(df2)
            .mark_text()
            .encode(x=alt.X("Zero:Q", axis=None), y=alt.Y("combo_col", axis=None))
        )

        text_chart = text_chart.encode(text="combo_col:N").properties(
            title=f"Route Statistics for Direction {direction_id}",
            width=500,
            height=300,
        )
        return text_chart

In [88]:
table_df = section2.route_stats(df_sched_vp_both)

#### Putting it all together

In [89]:
def filtered_route(
    df: pd.DataFrame,
) -> alt.Chart:
    """
    https://stackoverflow.com/questions/58919888/multiple-selections-in-altair
    """
    routes_list = df["route_combined_name"].unique().tolist()

    initialize_first_route = sorted(routes_list)[0]

    # Add the default value to the options list
    options = [initialize_first_route] + sorted(routes_list)

    route_dropdown = alt.binding_select(
        options=options,
        name="Routes",
    )

    # Column that controls the bar charts
    route_selector = alt.selection_single(
        fields=["route_combined_name"],
        bind=route_dropdown,
    )

    # Data
    # Filter for only schedule and vp
    df_sched_vp_both = df[df.sched_rt_category == "schedule_and_vp"].reset_index(
        drop=True
    )

    # Filter for only rows categorized as found in schedule and vp and all_day
    all_day = df_sched_vp_both.loc[
        df_sched_vp_both.time_period == "all_day"
    ].reset_index(drop=True)

    # Create route stats table for the text tables
    route_stats_df = section2.route_stats(df)

    # Manipulate the df for some of the metrics
    timeliness_df = section2.timeliness_trips(df_sched_vp_both)
    rt_journey_vp = pct_vp_journey(
        all_day, "pct_rt_journey_atleast1_vp", "pct_rt_journey_atleast2_vp"
    )
    sched_journey_vp = pct_vp_journey(
        all_day, "pct_rt_journey_atleast1_vp", "pct_rt_journey_atleast2_vp"
    )

    # Charts
    avg_scheduled_min = (
        grouped_bar_chart(
            df=all_day,
            color_col="direction_id",
            y_col="avg_scheduled_service_minutes",
            offset_col="direction_id",
            title="Average Scheduled Minutes",
            subtitle="The average minutes a trip is scheduled to run.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    timeliness_trips_dir_0 = (
        (
            base_facet_chart(
                timeliness_df.loc[timeliness_df.direction_id == 0],
                "value",
                "variable",
                "time_period",
                "Breakdown of Trips by Categories for Direction 0",
                "Categorizing whether a trip is early, late, or ontime. A trip is on time if it arrives 5 minutes later or earlier than scheduled.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    timeliness_trips_dir_1 = (
        (
            base_facet_chart(
                timeliness_df.loc[timeliness_df.direction_id == 1],
                "value",
                "variable",
                "time_period",
                "Breakdown of Trips by Categories for Direction 1",
                "Categorizing whether a trip is early, late, or ontime. A trip is on time if it arrives 5 minutes later or earlier than scheduled.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    frequency = (
        frequency_chart(df_sched_vp_both)
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    speed = (
        base_facet_line(
            df_sched_vp_both,
            "speed_mph",
            "Average Speed",
            "The average miles per hour the bus travels by direction and time of day.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    vp_per_min = (
        (
            base_facet_with_ruler_chart(
                all_day,
                "vp_per_minute",
                "ruler_for_vp_per_min",
                "Vehicle Positions per Minute",
                "Trips should have 2+ vehicle positions per minute.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    rt_vp_per_min = (
        base_facet_circle(
            rt_journey_vp,
            "value",
            "ruler_100_pct",
            "Percentage of Realtime Trips with 1+ and 2+ Vehicle Positions",
            "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    sched_vp_per_min = (
        base_facet_circle(
            sched_journey_vp,
            "value",
            "sched_journey_vp",
            "Percentage of Scheduled Trips with 1+ and 2+ Vehicle Positions",
            "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    spatial_accuracy = (
        base_facet_with_ruler_chart(
            all_day,
            "pct_in_shape",
            "ruler_100_pct",
            "Spatial Accuracy",
            "The percentage of vehicle positions that fall within the static scheduled route shape reflects the accuracy of the spatial, realtime data.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    text_dir0 = (
        (create_text_table(route_stats_df, 0))
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    text_dir1 = (
        create_text_table(route_stats_df, 1)
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    chart_list = [
        avg_scheduled_min,
        timeliness_trips_dir_0,
        timeliness_trips_dir_1,
        frequency,
        speed,
        vp_per_min,
        rt_vp_per_min,
        sched_vp_per_min,
        spatial_accuracy,
        text_dir0,
        text_dir1,
    ]

    chart = alt.vconcat(*chart_list).properties(
        resolve=alt.Resolve(
            scale=alt.LegendResolveMap(color=alt.ResolveMode("independent"))
        )
    )
    return chart

In [90]:
filtered_route(df_sched_vp_both.sample(1000))



NameError: name 'base_facet_chart' is not defined