## Round 1 
* Updating existing charts made by Tiffany. https://gtfs-digest--cal-itp-data-analyses.netlify.app/
* cd rt_segment_speeds && pip install altair_transform && pip install -r requirements.txt && cd ../_shared_utils && make setup_env

Links
* https://github.com/cal-itp/data-analyses/issues/1059
* https://docs.google.com/document/d/1I1WiqlmU06W6iLCi7cZQrOCLILkrEfABEkcU0Jys7f0/edit
* https://route-speeds--cal-itp-data-analyses.netlify.app/name_bay-area-511-muni-schedule/0__report__name_bay-area-511-muni-schedule
* https://posit-dev.github.io/great-tables/get-started/nanoplots.html
* https://docs.pola.rs/py-polars/html/reference/api/polars.from_pandas.html
* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/_rt_scheduled_utils.py
* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/_threshold_utils.py

In [69]:
import _section2_utils as section2
import _report_utils
import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import great_tables as gt
import pandas as pd
from calitp_data_analysis import calitp_color_palette as cp
from great_tables import md
from IPython.display import HTML, Markdown, display
from segment_speed_utils.project_vars import RT_SCHED_GCS, SCHED_GCS
from shared_utils import rt_dates, rt_utils, catalog_utils

alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)
from typing import List, Union

from altair_transform.extract import extract_transform
from altair_transform.transform import visit
from altair_transform.utils import to_dataframe

In [70]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [71]:
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [72]:
name = 'Bay Area 511 Muni Schedule'

### Original File 

In [73]:
schd_vp_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.route_schedule_vp}.parquet"
    

In [74]:
og = pd.read_parquet(schd_vp_url, filters=[[("sched_rt_category", "==", "schedule_and_vp")]],
                        columns = ["organization_name", "caltrans_district", "name"]
    ).dropna(
        subset="caltrans_district"
    ).sort_values(["caltrans_district", "organization_name"]).drop_duplicates()

In [75]:
og.shape

(91, 3)

In [76]:
# og

### Data

In [77]:
df = section2.load_schedule_vp_metrics(name)

In [78]:
most_recent_date = df.service_date.max()

In [79]:
df.head(2)

Unnamed: 0,schedule_gtfs_dataset_key,direction_id,time_period,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,frequency,service_date,minutes_atleast1_vp,minutes_atleast2_vp,total_rt_service_minutes,total_scheduled_service_minutes,total_vp,vp_in_shape,is_early,is_ontime,is_late,n_vp_trips,vp_per_minute,pct_in_shape,pct_rt_journey_atleast1_vp,pct_rt_journey_atleast2_vp,pct_sched_journey_atleast1_vp,pct_sched_journey_atleast2_vp,rt_sched_journey_ratio,avg_rt_service_minutes,sched_rt_category,speed_mph,name,route_long_name,route_short_name,route_combined_name,route_id,typology,schedule_source_record_id,base64_url,organization_source_record_id,organization_name,caltrans_district,rt_sched_journey_ratio_cat,frequency_cat,vp_per_minute_cat,ruler_100_pct,ruler_for_vp_per_min
488746,7cc0cb1871dfd558f11a2885c145d144,0.0,all_day,41.33,0.12,151,6.29,2023-04-12,7816,7708,12084.08,6194.0,23106,21485,4,28,118,150,1.91,93.0,65.0,64.0,100.0,100.0,1.95,80.56,schedule_and_vp,6.91,Bay Area 511 Muni Schedule,CALIFORNIA,1,1 CALIFORNIA,1,downtown_local,recHD22phgJs34JHP,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TRg==,rechaapWbeffO33OX,City and County of San Francisco,04 - Oakland,Late by 50%+ of the scheduled time,3+ trips/hour,<3 pings/minute,100,2
488747,7cc0cb1871dfd558f11a2885c145d144,0.0,all_day,41.33,0.12,151,6.29,2023-04-12,7816,7708,12084.08,6194.0,23106,21485,4,28,118,150,1.91,93.0,65.0,64.0,100.0,100.0,1.95,80.56,schedule_and_vp,6.91,Bay Area 511 Muni Schedule,CALIFORNIA,1,1 CALIFORNIA,1,downtown_local,recHD22phgJs34JHP,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TRg==,rechaapWbeffO33OX,City and County of San Francisco,04 - Oakland,Late by 50%+ of the scheduled time,3+ trips/hour,<3 pings/minute,100,2


In [80]:
df.time_period.value_counts()

all_day    16427
offpeak    15939
peak       14409
Name: time_period, dtype: int64

### Only use `schedule_and_vp` for charts!!

In [81]:
df_sched_vp_both = df[df.sched_rt_category == "schedule_and_vp"]

In [82]:
df_sched_vp_both.shape

(43342, 44)

#### One Route

In [83]:
df_sched_vp_both.route_combined_name.unique()

array(['1 CALIFORNIA', '12 FOLSOM-PACIFIC', '14 MISSION',
       '14R MISSION RAPID', '18 46TH AVENUE', '19 POLK',
       '1X CALIFORNIA EXPRESS', '2 SUTTER', '21 HAYES', '22 FILLMORE',
       '23 MONTEREY', '24 DIVISADERO', '27 BRYANT', '28 19TH AVENUE',
       '28R 19TH AVENUE RAPID', '29 SUNSET', '30 STOCKTON', '31 BALBOA',
       '33 ASHBURY-18TH ST', '35 EUREKA', '36 TERESITA', '37 CORBETT',
       '38 GEARY', '38R GEARY RAPID', '39 COIT', '43 MASONIC',
       "44 O'SHAUGHNESSY", '45 UNION-STOCKTON', '48 QUINTARA-24TH STREET',
       '49 VAN NESS-MISSION', '5 FULTON', '52 EXCELSIOR', '54 FELTON',
       '55 DOGPATCH', '56 RUTLAND', '57 PARKMERCED', '58 LAKE MERCED',
       '5R FULTON RAPID', '6 HAIGHT-PARNASSUS', '66 QUINTARA',
       '67 BERNAL HEIGHTS', '7 HAIGHT-NORIEGA', '714 BART EARLY BIRD',
       '8 BAYSHORE', '8AX BAYSHORE A EXPRESS', '8BX BAYSHORE B EXPRESS',
       '9 SAN BRUNO', '90 SAN BRUNO OWL', '9R SAN BRUNO RAPID',
       'CA CALIFORNIA STREET CABLE CAR', 'F MARKE

In [84]:
# Has very little data: 652 Skyline High - Elmhurst Bay Area 511 AC Transit Schedule
# Has a lot of data: "18 Solano - Shattuck - MLK Jr."

In [85]:
one_route = df_sched_vp_both.loc[
    df_sched_vp_both.route_combined_name == 'KBUS INGLESIDE BUS'
]

In [86]:
one_route.shape

(484, 44)

In [87]:
one_route.sample()

Unnamed: 0,schedule_gtfs_dataset_key,direction_id,time_period,avg_scheduled_service_minutes,avg_stop_miles,n_scheduled_trips,frequency,service_date,minutes_atleast1_vp,minutes_atleast2_vp,total_rt_service_minutes,total_scheduled_service_minutes,total_vp,vp_in_shape,is_early,is_ontime,is_late,n_vp_trips,vp_per_minute,pct_in_shape,pct_rt_journey_atleast1_vp,pct_rt_journey_atleast2_vp,pct_sched_journey_atleast1_vp,pct_sched_journey_atleast2_vp,rt_sched_journey_ratio,avg_rt_service_minutes,sched_rt_category,speed_mph,name,route_long_name,route_short_name,route_combined_name,route_id,typology,schedule_source_record_id,base64_url,organization_source_record_id,organization_name,caltrans_district,rt_sched_journey_ratio_cat,frequency_cat,vp_per_minute_cat,ruler_100_pct,ruler_for_vp_per_min
526012,7cc0cb1871dfd558f11a2885c145d144,1.0,all_day,51.0,0.17,4,0.17,2023-08-15,285,281,1657.13,204.0,842,706,0,0,4,4,0.51,84.0,17.0,17.0,100.0,100.0,8.12,414.28,schedule_and_vp,11.23,Bay Area 511 Muni Schedule,K INGLESIDE BUS,KBUS,KBUS INGLESIDE BUS,KBUS,downtown_local,recHD22phgJs34JHP,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TRg==,rechaapWbeffO33OX,City and County of San Francisco,04 - Oakland,Late by 50%+ of the scheduled time,<1 trip/hour,<1 ping/minute,100,2


In [88]:
one_route_all_day = one_route.loc[one_route.time_period == "all_day"]

In [89]:
one_route_all_day.service_date.value_counts()

2023-04-12    22
2023-05-17    22
2023-06-14    22
2023-07-12    22
2023-08-15    22
2023-10-11    22
2023-11-15    22
2023-12-13    22
2024-01-17    22
2024-02-14    22
2024-03-13    22
Name: service_date, dtype: int64

#### Avg Scheduled Minutes (Done)
* How come the average_scheduled_minutes is a lot shorter than total_rt_service_minutes and total_scheduled_service_minutes?
* Kind of a boring chart? Should compare actual run time compared to service minutes?

In [122]:
import yaml

with open("readable.yml") as f:
    readable_dict = yaml.safe_load(f)

In [124]:
readable_dict['avg_scheduled_min']['title']

'Average Scheduled Minutes'

In [125]:
section2.grouped_bar_chart(
    df=one_route[one_route.time_period == "all_day"].drop_duplicates(),
    color_col="direction_id",
    y_col="avg_scheduled_service_minutes",
    offset_col="direction_id",
    title=readable_dict['avg_scheduled_min']['title'],
    subtitle=readable_dict['avg_scheduled_min']['subtitle'],
)

#### Timeliness #2 (Done)

In [91]:
timeliness = section2.timeliness_trips(one_route)

In [92]:
timeliness.sample()

Unnamed: 0,service_date,organization_name,route_combined_name,time_period,direction_id,variable,value
661,2023-10-11,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,1.0,is_late,4


In [93]:
timeliness.loc[timeliness.service_date == "2023-04-12"].sort_values(by = ['service_date','direction_id','time_period'])

Unnamed: 0,service_date,organization_name,route_combined_name,time_period,direction_id,variable,value
0,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
1,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
2,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
3,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
4,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
5,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
6,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
7,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
8,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0
9,2023-04-12,City and County of San Francisco,KBUS INGLESIDE BUS,offpeak,0.0,is_early,0


In [94]:
section2.base_facet_chart(
    timeliness.loc[timeliness.direction_id == 0].drop_duplicates(),
    "value",
    "variable",
    "time_period",
    "Breakdown of Trips by Categories for Direction 0",
    "Trips broken down by VP Only (found only in real-time data), Schedule Only (found only in schedule data), or both.",
).interactive()

#### Total Scheduled Trips (Don't Use, boring)

In [95]:
# IF we take away direction, see how many times a route is scheduled to run
total_scheduled_trip = (
    one_route.groupby(["service_date", "time_period"])
    .agg({"n_scheduled_trips": "mean"})
    .reset_index()
)

In [96]:
total_scheduled_trip.n_scheduled_trips = total_scheduled_trip.n_scheduled_trips / 2

In [97]:
total_scheduled_trip.head()

Unnamed: 0,service_date,time_period,n_scheduled_trips
0,2023-04-12,all_day,2.0
1,2023-04-12,offpeak,2.0
2,2023-05-17,all_day,2.0
3,2023-05-17,offpeak,2.0
4,2023-06-14,all_day,2.0


In [98]:
one_route.groupby(["service_date", "direction_id", "time_period"]).agg(
    {"n_scheduled_trips": "max"}
).head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n_scheduled_trips
service_date,direction_id,time_period,Unnamed: 3_level_1
2023-04-12,0.0,all_day,4
2023-04-12,0.0,offpeak,4
2023-04-12,1.0,all_day,4
2023-04-12,1.0,offpeak,4
2023-05-17,0.0,all_day,4


In [99]:
section2.grouped_bar_chart(
    df=one_route.loc[one_route.time_period != "all_day"].drop_duplicates(),
    color_col="time_period",
    y_col="n_scheduled_trips",
    offset_col="direction_id",
    title="Total Scheduled Trips",
    subtitle="How many times per day is this route is scheduled to run in one particular direction.",
)

#### Frequency (Done)
* Maybe shouldn't be a chart since there doesn't seem to be a lot of data for this across a lot of the routes?
* What does frequency mean?
* Simplify down to not take direction_id into consideration?

In [100]:
section2.frequency_chart(one_route)

#### Speed MPH (Done)
* Needs a different type of chart.

In [101]:
section2.base_facet_line(
    one_route,
    "speed_mph",
    "Average Speed",
    "The average miles per hour the bus travels by direction and time of day.",
)

#### VP per Minute (WIP)
* Heatmap too confusing & detailed

In [102]:
def add_background():
    # Sample data
    data = pd.DataFrame(
        {
            "x": range(9),
            "y": [
                0,
                0.5,
                1.5,
                2.5,
                3.5,
                2,
                1,
                3,
                4,
            ],
        }
    )

    # Background rectangle for color bands
    bg_chart = (
        alt.Chart(data)
        .mark_rect()
        .encode(
            y=alt.Y("y:Q", axis=None),
            y2="y2:Q",
            color=alt.Color("y:Q", scale=alt.Scale(range=red_green_yellow)),
            opacity=alt.value(0.2),  # Adjust opacity as needed
        )
        .transform_calculate(y2="datum.y + 1")
    )

    return bg_chart

In [126]:
section2.base_facet_with_ruler_chart(
                one_route.drop_duplicates(),
                "vp_per_minute",
                "ruler_for_vp_per_min",
                readable_dict['vp_per_min']['title'],
                readable_dict['vp_per_min']['subtitle'],
            )

In [104]:
blue_palette = ["#B9D6DF", "#2EA8CE", "#0B405B"]

#### Spatial Accuracy (Done)
* Multiple it by 100? Should this be rounded or not?

In [105]:
one_route.loc[one_route.time_period == "all_day"][['service_date','pct_in_shape']]

Unnamed: 0,service_date,pct_in_shape
525725,2023-04-12,71.0
525726,2023-04-12,71.0
525727,2023-04-12,71.0
525728,2023-04-12,71.0
525729,2023-04-12,71.0
525730,2023-04-12,71.0
525731,2023-04-12,71.0
525732,2023-04-12,71.0
525733,2023-04-12,71.0
525734,2023-04-12,71.0


In [106]:
section2.base_facet_with_ruler_chart(
    one_route.loc[one_route.time_period == "all_day"].drop_duplicates(),
    "pct_in_shape",
    "ruler_100_pct",
    "Spatial Accuracy",
    "The percentage of vehicle positions that fall within the static scheduled route shape reflects the accuracy of the spatial, realtime data.",
)

#### % RT journey with 1+/2+ vp (goal line = 100%) - use all_day, one chart shared for 1+ and 2+ (Done need subtitle)

In [107]:
def pct_vp_journey(df: pd.DataFrame, col1: str, col2: str) -> pd.DataFrame:
    to_keep = [
        "service_date",
        "organization_name",
        "direction_id",
        col1,
        col2,
        "route_combined_name",
        "time_period",
        "route_id",
        "ruler_100_pct",
    ]
    df2 = df[to_keep]

    df3 = df2.melt(
        id_vars=[
            "service_date",
            "organization_name",
            "route_combined_name",
            "direction_id",
            "time_period",
            "route_id",
            "ruler_100_pct",
        ],
        value_vars=[col1, col2],
    )

    return df3

In [108]:
pct_rt = pct_vp_journey(
    one_route, "pct_rt_journey_atleast1_vp", "pct_rt_journey_atleast2_vp"
)

#### % schedule journey with 1+/2+ vp (goal line = 100%) - use all_day, one chart shared for 1+ and 2+ (Done need subtitle)

In [109]:
schd_rt = pct_vp_journey(
    one_route, "pct_sched_journey_atleast1_vp", "pct_sched_journey_atleast2_vp"
)

In [110]:
""" base_facet_circle(
    schd_rt.loc[schd_rt.time_period == "all_day"],
    "value",
    "ruler_100_pct",
    "Percentage of Scheduled Trips with 1+ and 2+ Vehicle Positions",
    "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
)"""

' base_facet_circle(\n    schd_rt.loc[schd_rt.time_period == "all_day"],\n    "value",\n    "ruler_100_pct",\n    "Percentage of Scheduled Trips with 1+ and 2+ Vehicle Positions",\n    "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",\n)'

#### Number of trips by sched_vp_category (/operator_schedule_rt_category)
* Why is the crosswalk missing SBMTD schedule??

In [111]:
op_sched_rt_cat = pd.read_parquet(
    f"{RT_SCHED_GCS}digest/operator_schedule_rt_category.parquet"
)

In [112]:
list(df.schedule_gtfs_dataset_key.unique())

['7cc0cb1871dfd558f11a2885c145d144', 'd8453d9a4ae1184f7e1339f7c61849df']

In [113]:
test = section2.load_operator_schedule_rt_category("de792182088eecc3d5c0bd3f1df62965")

In [114]:
op_sched_rt_cat.schedule_gtfs_dataset_key.nunique()

272

In [115]:
def basic_bar_chart(
    df: pd.DataFrame, y_col: str, color_col: str, title: str, subtitle: str
):
    chart = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X(
                f"yearmonthdate(service_date):O",
                title="Service Date",
                axis=alt.Axis(format="%b %Y"),
            ),
            y=alt.Y(f"{y_col}:Q", title=labeling(y_col)),
            color=alt.Color(
                f"{color_col}:N",
                scale=alt.Scale(
                    range=cp.CALITP_SEQUENTIAL_COLORS,
                ),
            ),
            tooltip=df.columns.tolist(),
        )
    ).properties(
        title={
            "text": title,
            "subtitle": subtitle,
        },
        width=500,
        height=300,
    )
    return chart

In [116]:
"""basic_bar_chart(
    op_sched_rt_cat,
    "n_trips",
    "sched_rt_category",
    "Trips that were found in GTFS, Schedule, or Both Data Sources",
    "",
) """

'basic_bar_chart(\n    op_sched_rt_cat,\n    "n_trips",\n    "sched_rt_category",\n    "Trips that were found in GTFS, Schedule, or Both Data Sources",\n    "",\n) '

#### Text

In [117]:
def create_text_table(df: pd.DataFrame, direction_id: str):

    df = (
        df.loc[df.direction_id == direction_id].drop_duplicates().reset_index(drop=True)
    )

    if len(df) == 0:
        text_chart = create_data_unavailable_chart()
        return text_chart

    else:
        df2 = df.melt(
            id_vars=[
                "route_combined_name",
                "direction_id",
            ],
            value_vars=[
                "avg_scheduled_service_minutes",
                "avg_stop_miles",
                "n_scheduled_trips",
                "sched_rt_category",
                "peak_avg_speed",
                "peak_scheduled_trips",
                "peak_hourly_freq",
                "offpeak_avg_speed",
                "offpeak_scheduled_trips",
                "offpeak_hourly_freq",
            ],
        )
        # Create a decoy column to center all the text
        df2["Zero"] = 0

        df2.variable = df2.variable.str.replace("_", " ").str.title()
        df2 = df2.sort_values(by=["direction_id"]).reset_index(drop=True)
        df2["combo_col"] = df2.variable.astype(str) + ": " + df2.value.astype(str)
        text_chart = (
            alt.Chart(df2)
            .mark_text()
            .encode(x=alt.X("Zero:Q", axis=None), y=alt.Y("combo_col", axis=None))
        )

        text_chart = text_chart.encode(text="combo_col:N").properties(
            title=f"Route Statistics for Direction {direction_id}",
            width=500,
            height=300,
        )
        return text_chart

In [118]:
table_df = section2.route_stats(df_sched_vp_both)

#### Putting it all together

In [119]:
def filtered_route(
    df: pd.DataFrame,
) -> alt.Chart:
    """
    https://stackoverflow.com/questions/58919888/multiple-selections-in-altair
    """
   
    # Data
    # Filter for only schedule and vp
    df_sched_vp_both = df[df.sched_rt_category == "schedule_and_vp"].reset_index(
        drop=True
    )
    
    routes_list = df_sched_vp_both["route_combined_name"].unique().tolist()


    route_dropdown = alt.binding_select(
        options=routes_list,
        name="Routes",
    )

    # Column that controls the bar charts
    route_selector = alt.selection_point(
        fields=["route_combined_name"],
        bind=route_dropdown,
    )

    # Filter for only rows categorized as found in schedule and vp and all_day
    all_day = df_sched_vp_both.loc[
        df_sched_vp_both.time_period == "all_day"
    ].reset_index(drop=True)

    # Create route stats table for the text tables
    route_stats_df = route_stats(df)

    # Manipulate the df for some of the metrics
    timeliness_df = timeliness_trips(df_sched_vp_both)
    rt_journey_vp = pct_vp_journey(
        all_day, "pct_rt_journey_atleast1_vp", "pct_rt_journey_atleast2_vp"
    )
    sched_journey_vp = pct_vp_journey(
        all_day, "pct_rt_journey_atleast1_vp", "pct_rt_journey_atleast2_vp"
    )

    # Charts
    avg_scheduled_min = (
        section2.grouped_bar_chart(
            df=all_day.drop_duplicates(),
            color_col="direction_id",
            y_col="avg_scheduled_service_minutes",
            offset_col="direction_id",
            title="Average Scheduled Minutes",
            subtitle="The average minutes a trip is scheduled to run.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    timeliness_trips_dir_0 = (
        (
            section2.base_facet_chart(
                timeliness_df.loc[timeliness_df.direction_id == 0].drop_duplicates(),
                "value",
                "variable",
                "time_period",
                "Breakdown of Trips by Categories for Direction 0",
                "Categorizing whether a trip is early, late, or ontime. A trip is on time if it arrives 5 minutes later or earlier than scheduled.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    timeliness_trips_dir_1 = (
        (
            section2.base_facet_chart(
                timeliness_df.loc[timeliness_df.direction_id == 1].drop_duplicates(),
                "value",
                "variable",
                "time_period",
                "Breakdown of Trips by Categories for Direction 1",
                "Categorizing whether a trip is early, late, or ontime. A trip is on time if it arrives 5 minutes later or earlier than scheduled.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    frequency = (
        section2.frequency_chart(df_sched_vp_both)
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    speed = (
        section2.base_facet_line(
            df_sched_vp_both,
            "speed_mph",
            "Average Speed",
            "The average miles per hour the bus travels by direction and time of day.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    vp_per_min = (
        (
            section2.base_facet_with_ruler_chart(
                all_day.drop_duplicates(),
                "vp_per_minute",
                "ruler_for_vp_per_min",
                "Vehicle Positions per Minute",
                "Trips should have 2+ vehicle positions per minute.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    rt_vp_per_min = (
        section2.base_facet_circle(
            rt_journey_vp,
            "value",
            "ruler_100_pct",
            "Percentage of Realtime Trips with 1+ and 2+ Vehicle Positions",
            "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    sched_vp_per_min = (
        section2.base_facet_circle(
            sched_journey_vp,
            "value",
            "sched_journey_vp",
            "Percentage of Scheduled Trips with 1+ and 2+ Vehicle Positions",
            "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    spatial_accuracy = (
        section2.base_facet_with_ruler_chart(
            all_day.drop_duplicates(),
            "pct_in_shape",
            "ruler_100_pct",
            "Spatial Accuracy",
            "The percentage of vehicle positions that fall within the static scheduled route shape reflects the accuracy of the spatial, realtime data.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    text_dir0 = (
        (section2.create_text_table(route_stats_df, 0))
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    text_dir1 = (
        section2.create_text_table(route_stats_df, 1)
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    chart_list = [
        avg_scheduled_min,
        timeliness_trips_dir_0,
        timeliness_trips_dir_1,
        frequency,
        speed,
        vp_per_min,
        rt_vp_per_min,
        sched_vp_per_min,
        spatial_accuracy,
        text_dir0,
        text_dir1,
    ]

    chart = alt.vconcat(*chart_list).properties(
        resolve=alt.Resolve(
            scale=alt.LegendResolveMap(color=alt.ResolveMode("independent"))
        )
    )
    return chart

In [120]:
filtered_route(df_sched_vp_both)

NameError: name 'route_stats' is not defined