## Round 1 
* Updating existing charts made by Tiffany. https://gtfs-digest--cal-itp-data-analyses.netlify.app/
* cd rt_segment_speeds && pip install altair_transform && pip install -r requirements.txt && cd ../_shared_utils && make setup_env

Links
* https://github.com/cal-itp/data-analyses/issues/1059
* https://docs.google.com/document/d/1I1WiqlmU06W6iLCi7cZQrOCLILkrEfABEkcU0Jys7f0/edit
* https://route-speeds--cal-itp-data-analyses.netlify.app/name_bay-area-511-muni-schedule/0__report__name_bay-area-511-muni-schedule
* https://posit-dev.github.io/great-tables/get-started/nanoplots.html
* https://docs.pola.rs/py-polars/html/reference/api/polars.from_pandas.html
* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/_rt_scheduled_utils.py
* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/_threshold_utils.py

In [1]:
import _report_utils
import _section2_utils as section2
import altair as alt
import calitp_data_analysis.magics
import geopandas as gpd
import great_tables as gt
import pandas as pd
from calitp_data_analysis import calitp_color_palette as cp
from great_tables import md
from IPython.display import HTML, Markdown, display
from segment_speed_utils.project_vars import RT_SCHED_GCS, SCHED_GCS
from shared_utils import catalog_utils, rt_dates, rt_utils

alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)
from typing import List, Union

from altair_transform.extract import extract_transform
from altair_transform.transform import visit
from altair_transform.utils import to_dataframe

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [3]:
GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data")

In [4]:
import yaml

with open("readable.yml") as f:
    readable_dict = yaml.safe_load(f)

In [5]:
org_name = "City and County of San Francisco"

In [6]:
# Set drop down menu to be on the upper right
display(
    HTML(
        """
<style>
form.vega-bindings {
  position: absolute;
  right: 0px;
  top: 0px;
}
</style>
"""
    )
)

### Original File 

In [7]:
schd_vp_url = f"{GTFS_DATA_DICT.digest_tables.dir}{GTFS_DATA_DICT.digest_tables.route_schedule_vp}.parquet"

In [8]:
og = pd.read_parquet(schd_vp_url)

In [9]:
og.columns

Index(['schedule_gtfs_dataset_key', 'direction_id', 'time_period',
       'avg_scheduled_service_minutes', 'avg_stop_miles', 'n_scheduled_trips',
       'frequency', 'service_date', 'minutes_atleast1_vp',
       'minutes_atleast2_vp', 'total_rt_service_minutes',
       'total_scheduled_service_minutes', 'total_vp', 'vp_in_shape',
       'is_early', 'is_ontime', 'is_late', 'n_vp_trips', 'vp_per_minute',
       'pct_in_shape', 'pct_rt_journey_atleast1_vp',
       'pct_rt_journey_atleast2_vp', 'pct_sched_journey_atleast1_vp',
       'pct_sched_journey_atleast2_vp', 'rt_sched_journey_ratio',
       'avg_rt_service_minutes', 'sched_rt_category', 'speed_mph', 'name',
       'route_long_name', 'route_short_name', 'route_combined_name',
       'route_id', 'typology', 'schedule_source_record_id', 'base64_url',
       'organization_source_record_id', 'organization_name',
       'caltrans_district'],
      dtype='object')

### Checkout Duplicates

In [10]:
df = section2.load_schedule_vp_metrics(org_name)

In [11]:
len(df)

4654

In [12]:
df.head(2)

Unnamed: 0,schedule_gtfs_dataset_key,Direction,Period,Average Scheduled Service (trip minutes),Average Stop Distance (miles),# scheduled trips,Trips per Hour,Date,# Minutes with 1+ VP per Minute,# Minutes with 2+ VP per Minute,Aggregate Actual Service Minutes,Aggregate Scheduled Service Minutes (all trips),# VP,# VP within Scheduled Shape,# Early Arrival Trips,# On-Time Trips,# Late Trips,# Trips with VP,Average VP per Minute,% VP within Scheduled Shape,% Actual Trip Minutes with 1+ VP per Minute,% Actual Trip Minutes with 2+ VP per Minute,% Scheduled Trip Minutes with 1+ VP per Minute,% Scheduled Trip Minutes with 2+ VP per Minute,Realtime versus Scheduled Service Ratio,Average Actual Service (Trip Minutes),GTFS Availability,Speed (MPH),Transit Operator,route_long_name,route_short_name,Route,Route ID,Route typology,schedule_source_record_id,Base64 Encoded Feed URL,Organization ID,Organization,District,rt_sched_journey_ratio_cat,vp_per_minute_cat,spatial_accuracy_cat,ruler_100_pct,ruler_for_vp_per_min,frequency_in_minutes
0,7cc0cb1871dfd558f11a2885c145d144,0.0,all_day,41.33,0.12,151,6.29,2023-04-12,7816,7708,12084.08,6194.0,23106,21485,4,28,118,150,1.91,93.0,65.0,64.0,100.0,100.0,1.95,80.56,schedule_and_vp,6.91,Bay Area 511 Muni Schedule,CALIFORNIA,1,1 CALIFORNIA,1,downtown_local,recHD22phgJs34JHP,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TRg==,rechaapWbeffO33OX,City and County of San Francisco,04 - Oakland,Late by 50%+ of the scheduled time,<2 pings/minute,<50% spatial accuracy,100,2,9.54
1,7cc0cb1871dfd558f11a2885c145d144,0.0,all_day,41.33,0.11,151,6.29,2023-05-17,8015,7898,12137.89,6194.0,23681,21951,0,27,123,150,1.95,93.0,66.0,65.0,100.0,100.0,1.96,80.92,schedule_and_vp,6.79,Bay Area 511 Muni Schedule,CALIFORNIA,1,1 CALIFORNIA,1,downtown_local,recHD22phgJs34JHP,aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TRg==,rechaapWbeffO33OX,City and County of San Francisco,04 - Oakland,Late by 50%+ of the scheduled time,<2 pings/minute,<50% spatial accuracy,100,2,9.54


### One Route

In [13]:
one_route = df.loc[df["Route"] == "49 VAN NESS-MISSION"]

In [14]:
one_route.shape

(78, 45)

In [15]:
one_route_all_day = one_route.loc[one_route["Period"] == "all_day"]

In [16]:
one_route_all_day["Route"].values[0]

'49 VAN NESS-MISSION'

In [17]:
one_route_all_day.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 26 entries, 2034 to 4467
Data columns (total 45 columns):
 #   Column                                           Non-Null Count  Dtype         
---  ------                                           --------------  -----         
 0   schedule_gtfs_dataset_key                        26 non-null     object        
 1   Direction                                        26 non-null     float64       
 2   Period                                           26 non-null     object        
 3   Average Scheduled Service (trip minutes)         26 non-null     float64       
 4   Average Stop Distance (miles)                    26 non-null     float64       
 5   # scheduled trips                                26 non-null     int64         
 6   Trips per Hour                                   26 non-null     float64       
 7   Date                                             26 non-null     datetime64[ns]
 8   # Minutes with 1+ VP per Minute      

#### Explanation Charts

In [18]:
def divider_chart(df: pd.DataFrame, text):
    df = df[["Route"]].drop_duplicates()
    route = df["Route"].values[0].title()
    # Create a text chart using Altair
    chart = (
        alt.Chart(df)
        .mark_text(
            align="center",
            baseline="middle",
            fontSize=12,
            text=f"{text} {route}.",
        )
        .properties(width=500, height=100)
    )

    return chart

In [19]:
divider_chart(df, "The charts below describe the quality of riding route")

#### Avg Scheduled Minutes (Done)
* How come the average_scheduled_minutes is a lot shorter than total_rt_service_minutes and total_scheduled_service_minutes?
* Kind of a boring chart? Should compare actual run time compared to service minutes?

In [23]:
def grouped_bar_chart(
    df: pd.DataFrame,
    color_col: str,
    y_col: str,
    offset_col: str,
    title: str,
    subtitle: str,
):
    tooltip_cols = [
        "Direction",
        "Period",
        "Route",
        "Organization",
        "Date",
        color_col,
        y_col,
    ]

    if len(df) == 0:
        text_chart = section2.create_data_unavailable_chart()
        return text_chart
    else:
        df = section2.clean_data_charts(df, y_col)
        chart = (
            alt.Chart(df)
            .mark_bar(size=10)
            .encode(
                x=alt.X(
                    "yearmonthdate(Date):O",
                    title=["Grouped by Direction ID", "Date"],
                    axis=alt.Axis(labelAngle=-45, format="%b %Y"),
                ),
                y=alt.Y(f"{y_col}:Q", title=_report_utils.labeling(y_col)),
                xOffset=alt.X(
                    f"{offset_col}:N", title=_report_utils.labeling(offset_col)
                ),
                color=alt.Color(
                    f"{color_col}:N",
                    title=_report_utils.labeling(color_col),
                    scale=alt.Scale(
                        range=_report_utils.red_green_yellow,
                    ),
                ),
                tooltip=tooltip_cols,
            )
        )
        chart = (chart).properties(
            title={
                "text": [title],
                "subtitle": [subtitle],
            },
            width=500,
            height=300,
        )

        return chart

In [24]:
grouped_bar_chart(
    df=one_route_all_day,
    color_col="Direction",
    y_col="Average Scheduled Service (trip minutes)",
    offset_col="Direction",
    title=readable_dict["avg_scheduled_min_graph"]["title"],
    subtitle=readable_dict["avg_scheduled_min_graph"]["subtitle"],
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Period"] = df["Period"].str.replace("_", " ").str.title()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[y_col] = df[y_col].fillna(0).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{y_col}_str"] = df[y_col].astype(str)


#### Timeliness #2 (Done)

In [39]:
def timeliness_trips(df: pd.DataFrame):
    to_keep = [
        "Date",
        "Organization",
        "Direction",
        "Period",
        "Route",
        "# Early Arrival Trips",
        "# On-Time Trips",
        "# Late Trips",
        "# Trips with VP",
    ]
    df = df.loc[df["Period"] != "all_day"]
    df2 = df[to_keep]

    melted_df = df2.melt(
        id_vars=[
            "Date",
            "Organization",
            "Route",
            "Period",
            "Direction",
        ],
        value_vars=[
            "# Early Arrival Trips",
            "# On-Time Trips",
            "# Late Trips",
        ],
    )
    return melted_df

In [40]:
timeliness = timeliness_trips(one_route)

In [41]:
timeliness["Period"].value_counts()

offpeak    78
peak       78
Name: Period, dtype: int64

In [42]:
def base_facet_chart(
    df: pd.DataFrame,
    y_col: str,
    color_col: str,
    facet_col: str,
    title: str,
    subtitle: str,
):
    tooltip_cols = [
        "Direction",
        "Period",
        "Route",
        "Organization",
        "Date",
        y_col,
        color_col,
    ]

    if len(df) == 0:
        text_chart = section2.create_data_unavailable_chart()
        return text_chart
    else:
        if "%" in y_col:
            max_y = 100
        elif "Minute" in y_col:
            max_y = round(df[y_col].max())
        else:
            max_y = round(df[y_col].max(), -1) + 5
        df = section2.clean_data_charts(df,y_col)
        chart = (
            (
                alt.Chart(df)
                .mark_bar(size=15, clip=True)
                .encode(
                    x=alt.X(
                        "yearmonthdate(Date):O",
                        title=["Date"],
                        axis=alt.Axis(labelAngle=-45, format="%b %Y"),
                    ),
                    y=alt.Y(
                        f"{y_col}:Q",
                        title=_report_utils.labeling(y_col),
                        scale=alt.Scale(domain=[0, max_y]),
                    ),
                    color=alt.Color(
                        f"{color_col}:N",
                        title=_report_utils.labeling(color_col),
                        scale=alt.Scale(range=_report_utils.red_green_yellow),
                    ),
                    tooltip=tooltip_cols,
                )
            )
            .facet(
                column=alt.Column(
                    f"{facet_col}:N",
                )
            )
            .properties(
                title={
                    "text": title,
                    "subtitle": subtitle,
                }
            )
        )
        return chart

In [43]:
base_facet_chart(
    timeliness.loc[timeliness["Direction"] == 1],
    "value",
    "variable",
    "Period",
    readable_dict["timeliness_trips_dir_1_graph"]["title"],
    readable_dict["timeliness_trips_dir_0_graph"]["subtitle"]
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["Period"] = df["Period"].str.replace("_", " ").str.title()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[y_col] = df[y_col].fillna(0).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{y_col}_str"] = df[y_col].astype(str)


#### Frequency (Done)
* Maybe shouldn't be a chart since there doesn't seem to be a lot of data for this across a lot of the routes?
* What does frequency mean?
* Simplify down to not take direction_id into consideration?

In [None]:
def frequency_chart(df: pd.DataFrame):
    if len(df) == 0:
        text_chart = create_data_unavailable_chart()
        return text_chart

    else:
        chart = (alt.Chart(df, width=180, height=alt.Step(10)).mark_bar().encode(
        alt.Y(
            "yearmonthdate(Date):O",
            title="Date",
            axis=alt.Axis(format="%b %Y"),
        ),
        alt.X("frequency_in_minutes:Q", title=_report_utils.labeling("frequency_in_minutes"), axis=None),
        alt.Color("frequency_in_minutes:Q", scale=alt.Scale(range=_report_utils.green_red_yellow)).title(
            _report_utils.labeling("frequency_in_minutes")
        ),
        alt.Row("Period:N").title(_report_utils.labeling("Period")).header(labelAngle=0),
        alt.Column("Direction:N").title(_report_utils.labeling("Direction")),
        tooltip=["Date", "Frequency in Minutes", "Period", "Direction"]
    )
                )
        chart = chart.properties(title="Frequency of Trips")
        return chart

#### Speed MPH (Done)
* Needs a different type of chart.

In [None]:
section3.base_facet_line(
    one_route,
    "speed_mph",
    "Average Speed",
    "The average miles per hour the bus travels by direction and time of day.",
)

#### VP per Minute (WIP)
* Heatmap too confusing & detailed

In [None]:
def add_background():
    # Sample data
    data = pd.DataFrame(
        {
            "x": range(9),
            "y": [
                0,
                0.5,
                1.5,
                2.5,
                3.5,
                2,
                1,
                3,
                4,
            ],
        }
    )

    # Background rectangle for color bands
    bg_chart = (
        alt.Chart(data)
        .mark_rect()
        .encode(
            y=alt.Y("y:Q", axis=None),
            y2="y2:Q",
            color=alt.Color("y:Q", scale=alt.Scale(range=red_green_yellow)),
            opacity=alt.value(0.2),  # Adjust opacity as needed
        )
        .transform_calculate(y2="datum.y + 1")
    )

    return bg_chart

In [None]:
section3.base_facet_with_ruler_chart(
    one_route.drop_duplicates(),
    "vp_per_minute",
    "ruler_for_vp_per_min",
    readable_dict["vp_per_min"]["title"],
    readable_dict["vp_per_min"]["subtitle"],
)

#### Spatial Accuracy (Done)
* Multiple it by 100? Should this be rounded or not?

In [None]:
section3.base_facet_with_ruler_chart(
    one_route.loc[one_route.time_period == "all_day"].drop_duplicates(),
    "pct_in_shape",
    "ruler_100_pct",
    "Spatial Accuracy",
    "The percentage of vehicle positions that fall within the static scheduled route shape reflects the accuracy of the spatial, realtime data.",
)

#### % RT journey with 1+/2+ vp (goal line = 100%) - use all_day, one chart shared for 1+ and 2+ (Done need subtitle)

In [None]:
def pct_vp_journey(df: pd.DataFrame, col1: str, col2: str) -> pd.DataFrame:
    to_keep = [
        "service_date",
        "organization_name",
        "direction_id",
        col1,
        col2,
        "route_combined_name",
        "time_period",
        "route_id",
        "ruler_100_pct",
    ]
    df2 = df[to_keep]

    df3 = df2.melt(
        id_vars=[
            "service_date",
            "organization_name",
            "route_combined_name",
            "direction_id",
            "time_period",
            "route_id",
            "ruler_100_pct",
        ],
        value_vars=[col1, col2],
    )

    return df3

In [None]:
pct_rt = pct_vp_journey(
    one_route, "pct_rt_journey_atleast1_vp", "pct_rt_journey_atleast2_vp"
)

#### % schedule journey with 1+/2+ vp (goal line = 100%) - use all_day, one chart shared for 1+ and 2+ (Done need subtitle)

In [None]:
schd_rt = pct_vp_journey(
    one_route, "pct_sched_journey_atleast1_vp", "pct_sched_journey_atleast2_vp"
)

In [None]:
""" base_facet_circle(
    schd_rt.loc[schd_rt.time_period == "all_day"],
    "value",
    "ruler_100_pct",
    "Percentage of Scheduled Trips with 1+ and 2+ Vehicle Positions",
    "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
)"""

#### Number of trips by sched_vp_category (/operator_schedule_rt_category)
* Why is the crosswalk missing SBMTD schedule??

In [None]:
op_sched_rt_cat = pd.read_parquet(
    f"{RT_SCHED_GCS}digest/operator_schedule_rt_category.parquet"
)

In [None]:
list(df.schedule_gtfs_dataset_key.unique())

In [None]:
test = section3.load_operator_schedule_rt_category("de792182088eecc3d5c0bd3f1df62965")

In [None]:
op_sched_rt_cat.schedule_gtfs_dataset_key.nunique()

In [None]:
def basic_bar_chart(
    df: pd.DataFrame, y_col: str, color_col: str, title: str, subtitle: str
):
    chart = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X(
                f"yearmonthdate(service_date):O",
                title="Service Date",
                axis=alt.Axis(format="%b %Y"),
            ),
            y=alt.Y(f"{y_col}:Q", title=labeling(y_col)),
            color=alt.Color(
                f"{color_col}:N",
                scale=alt.Scale(
                    range=cp.CALITP_SEQUENTIAL_COLORS,
                ),
            ),
            tooltip=df.columns.tolist(),
        )
    ).properties(
        title={
            "text": title,
            "subtitle": subtitle,
        },
        width=500,
        height=300,
    )
    return chart

In [None]:
"""basic_bar_chart(
    op_sched_rt_cat,
    "n_trips",
    "sched_rt_category",
    "Trips that were found in GTFS, Schedule, or Both Data Sources",
    "",
) """

#### Text

In [None]:
table_df = section3.route_stats(one_route)

In [None]:
section3.create_text_table(table_df, 0)

In [None]:
section3.create_text_table(table_df, 1)

In [None]:
routes_list = df["route_combined_name"].drop_duplicates().tolist()

dropdown = alt.binding_select(options=routes_list, name="Select Route")

In [None]:
routes_list[0]

In [None]:
xcol_param = alt.param(value="AMRTS Red Route", bind=dropdown)

In [None]:
section3.base_facet_with_ruler_chart(
    df.loc[df.time_period == "all_day"].drop_duplicates(),
    "pct_in_shape",
    "ruler_100_pct",
    "Spatial Accuracy",
    "The percentage of vehicle positions that fall within the static scheduled route shape reflects the accuracy of the spatial, realtime data.",
).transform_calculate(x=f"datum[{xcol_param.name}]").add_params(xcol_param)

#### Putting it all together

In [None]:
def filtered_route(
    df: pd.DataFrame,
) -> alt.Chart:
    """
    https://stackoverflow.com/questions/58919888/multiple-selections-in-altair
    """
    # Create dropdown
    routes_list = df["Route"].unique().tolist()

    route_dropdown = alt.binding_select(
        options=routes_list,
        name="Routes",
    )
    # Column that controls the bar charts
    route_selector = alt.selection_point(
        fields=["Route"],
        bind=route_dropdown,
    )

    # Filter for only rows categorized as found in schedule and vp and all_day
    all_day = df.loc[df["Period"] == "all_day"].reset_index(drop=True)

    # Create route stats table for the text tables
    route_stats_df = section3.route_stats(df)

    # Manipulate the df for some of the metrics
    timeliness_df = section3.timeliness_trips(df)
    rt_journey_vp = section3.pct_vp_journey(
        all_day, "pct_rt_journey_atleast1_vp", "pct_rt_journey_atleast2_vp"
    )
    sched_journey_vp = section3.pct_vp_journey(
        all_day, "pct_sched_journey_atleast1_vp", "pct_sched_journey_atleast2_vp"
    )

    # Charts
    rider_quality = divider_chart(
        df, "The charts below describe the quality of riding route"
    )

    avg_scheduled_min = (
        section3.grouped_bar_chart(
            df=all_day,
            color_col="direction_id",
            y_col="avg_scheduled_service_minutes",
            offset_col="direction_id",
            title="Average Scheduled Minutes",
            subtitle="The average minutes a trip is scheduled to run.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    timeliness_trips_dir_0 = (
        (
            section3.base_facet_chart(
                timeliness_df.loc[timeliness_df.direction_id == 0],
                "value",
                "variable",
                "time_period",
                "Breakdown of Trips by Categories for Direction 0",
                "Categorizing whether a trip is early, late, or ontime. A trip is on time if it arrives 5 minutes later or earlier than scheduled.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    timeliness_trips_dir_1 = (
        (
            section3.base_facet_chart(
                timeliness_df.loc[timeliness_df.direction_id == 1],
                "value",
                "variable",
                "time_period",
                "Breakdown of Trips by Categories for Direction 1",
                "Categorizing whether a trip is early, late, or ontime. A trip is on time if it arrives 5 minutes later or earlier than scheduled.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    frequency = (
        section3.frequency_chart(df)
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    speed = (
        section3.base_facet_line(
            df,
            "speed_mph",
            "Average Speed",
            "The average miles per hour the bus travels by direction and time of day.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    data_quality = divider_chart(
        df, "The charts below describe the quality of the data collected for route"
    )
    vp_per_min = (
        (
            section3.base_facet_with_ruler_chart(
                all_day,
                "vp_per_minute",
                "ruler_for_vp_per_min",
                "Vehicle Positions per Minute",
                "Trips should have 2+ vehicle positions per minute.",
            )
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    rt_vp_per_min = (
        section3.base_facet_circle(
            rt_journey_vp,
            "value",
            "ruler_100_pct",
            "Percentage of Realtime Trips with 1+ and 2+ Vehicle Positions",
            "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    sched_vp_per_min = (
        section3.base_facet_circle(
            sched_journey_vp,
            "value",
            "ruler_100_pct",
            "Percentage of Scheduled Trips with 1+ and 2+ Vehicle Positions",
            "The goal is for almost 100% of trips to have 2 or more Vehicle Positions per minute.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    spatial_accuracy = (
        section3.base_facet_with_ruler_chart(
            all_day,
            "pct_in_shape",
            "ruler_100_pct",
            "Spatial Accuracy",
            "The percentage of vehicle positions that fall within the static scheduled route shape reflects the accuracy of the spatial, realtime data.",
        )
        .add_params(route_selector)
        .transform_filter(route_selector)
    )

    text_dir0 = (
        (section3.create_text_table(route_stats_df, 0))
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    text_dir1 = (
        section3.create_text_table(route_stats_df, 1)
        .add_params(route_selector)
        .transform_filter(route_selector)
    )
    chart_list = [
        avg_scheduled_min,
        timeliness_trips_dir_0,
        timeliness_trips_dir_1,
        frequency,
        speed,
        vp_per_min,
        rt_vp_per_min,
        sched_vp_per_min,
        spatial_accuracy,
        text_dir0,
        text_dir1,
    ]

    chart = alt.vconcat(*chart_list).properties(
        resolve=alt.Resolve(
            scale=alt.LegendResolveMap(color=alt.ResolveMode("independent"))
        )
    )
    return chart

In [None]:
filtered_route(df)