In [7]:
import sys

sys.path.append("../bus_service_increase")
sys.path.append("../starter_kit")  # to test out style_df function

In [20]:
%%capture
import warnings

warnings.filterwarnings("ignore")

import altair as alt
import calitp_data_analysis.magics
import pandas as pd
from shared_utils.portfolio_utils import label_visualization
from _starterkit_utils import style_df
from calitp_data_analysis import calitp_color_palette as cp
from explore_monthly_ridership_by_rtpa import sum_by_group
from IPython.display import HTML, Markdown, display
from update_vars import MONTH, PUBLIC_FILENAME, YEAR, NTD_MODES, NTD_TOS

# from monthly_ridership_by_rtpa import get_percent_change
# from shared_utils.rt_dates import MONTH_DICT

# Temp file path for testing
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/ntd/"

# alt.renderers.enable("html")
alt.data_transformers.enable("default", max_rows=None)

In [9]:
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [10]:
# parameters cell for local
rtpa = "Metropolitan Transportation Commission"  # lots of reporters
# rtpa = "Butte County Association of Governments" # 2 reporters
# rtpa = "San Joaquin Council of Governments" # 4 reporters

In [11]:
%%capture_parameters
rtpa

{"rtpa": "Metropolitan Transportation Commission"}


---

## Update `produce_ntd_monthly_ridership_by_rtpa` function

In [12]:
from calitp_data_analysis.tables import tbls
from siuba import _, collect, distinct, filter, select

In [None]:
# from annual_ridership_report.annual_ridership_module import ntd_id_to_rtpa_crosswalk

In [26]:
from _01_ntd_ridership_utils import ntd_id_to_rtpa_crosswalk, add_change_columns

In [2]:
xwalk_no_split = ntd_id_to_rtpa_crosswalk(split_scag=False)

In [None]:
# display(
#     test.head(),
#     test.info(),
#     test["rtpa_name"].value_counts()
# )

In [3]:
xwalk_w_split = ntd_id_to_rtpa_crosswalk(split_scag=True)

In [4]:
xwalk_no_split["rtpa_name"].unique() == xwalk_w_split["rtpa_name"].unique()

  xwalk_no_split["rtpa_name"].unique() == xwalk_w_split["rtpa_name"].unique()


False

In [5]:
xwalk_w_split.columns.tolist()

['name',
 'ntd_id_2022',
 'rtpa_name',
 'mpo_name',
 'key',
 'county_geography_name',
 'organization_key']

In [16]:
# updated
def produce_ntd_monthly_ridership_by_rtpa(year: int, month: int) -> pd.DataFrame:
    """
    This function works with the warehouse `dim_monthly_ntd_ridership_with_adjustments` long data format.
    Import NTD data from warehouse, filter to CA,
    merge in crosswalk, checks for unmerged rows, then creates new columns for full Mode and TOS name.

    """

    full_upt = (
        tbls.mart_ntd.dim_monthly_ridership_with_adjustments()
        >> filter(
            _.period_year.isin(
                ["2018", "2019", "2020", "2021", "2022", "2023", "2024", "2025"]
            )
        )
        >>select(
            _.ntd_id,
            _.agency,
            _.reporter_type,
            _.period_year_month,
            _.period_year,
            _.period_month,
            _.mode,
            _.tos,
            _.mode_type_of_service_status,
            _.primary_uza_name,
            _.upt
            
        )
        >> collect()
    ).rename(
        columns={
            "mode_type_of_service_status": "Status",
            "primary_uza_name": "uza_name",
        }
    )

    full_upt = full_upt[full_upt.agency.notna()].reset_index(drop=True)

    # full_upt.to_parquet(
    #     f"{GCS_FILE_PATH}ntd_monthly_ridership_{year}_{month}.parquet"
    # )

    ca = full_upt[
        (full_upt["uza_name"].str.contains(", CA")) & (full_upt.agency.notna())
    ].reset_index(drop=True)

    # use new crosswalk function
    crosswalk = ntd_id_to_rtpa_crosswalk(split_scag=True)

    min_year = 2018

    # get agencies with last report year and data after > 2018.
    last_report_year = (
        tbls.mart_ntd_funding_and_expenses.fct_service_data_and_operating_expenses_time_series_by_mode_upt()
        >> filter(
            _.year >= min_year,  # see if this changes anything
            _.last_report_year >= min_year,
            _.primary_uza_name.str.contains(", CA")
            | _.primary_uza_name.str.contains("CA-NV")
            | _.primary_uza_name.str.contains("California Non-UZA"),
        )
        >> distinct(
            "source_agency",
            #'agency_status',
            #'legacy_ntd_id',
            "last_report_year",
            #'mode',
            "ntd_id",
            #'reporter_type',
            #'reporting_module',
            #'service',
            #'uace_code',
            #'primary_uza_name',
            #'uza_population',
            #'year',
            #'upt',
        )
        >> collect()
    )

    # merge last report year to CA UPT data
    df = pd.merge(ca, last_report_year, left_on="ntd_id", right_on="ntd_id", how="inner")

    # merge crosswalk to CA last report year
    df = pd.merge(
        df,
        # Merging on too many columns can create problems
        # because csvs and dtypes aren't stable / consistent
        # for NTD ID, Legacy NTD ID, and UZA
        crosswalk[["ntd_id_2022", "rtpa_name"]],
        left_on="ntd_id",
        right_on="ntd_id_2022",
        how="left",
        indicator=True,
    )

    print(df._merge.value_counts())

    # check for unmerged rows
    if len(df[df._merge == "left_only"]) > 0:
        raise ValueError("There are unmerged rows to crosswalk")
    
    df = add_change_columns(df)
    
    df = df.assign(
        Mode_full = df["mode"].map(NTD_MODES),
        TOS_full = df["tos"].map(NTD_TOS)
    )
    
    return df

In [27]:
data = produce_ntd_monthly_ridership_by_rtpa(YEAR, MONTH)

NameError: name '_01_ntd_ridership_utils' is not defined

In [24]:
display(
    data.info(),
    data["rtpa_name"].value_counts(),
    data.describe()
)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 28864 entries, 0 to 28863
Data columns (total 21 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   ntd_id             28864 non-null  object  
 1   agency             28864 non-null  object  
 2   reporter_type      28864 non-null  object  
 3   period_year_month  28864 non-null  object  
 4   period_year        28864 non-null  int64   
 5   period_month       28864 non-null  int64   
 6   mode               28864 non-null  object  
 7   tos                28864 non-null  object  
 8   Status             28864 non-null  object  
 9   uza_name           28864 non-null  object  
 10  upt                18637 non-null  float64 
 11  source_agency      28864 non-null  object  
 12  last_report_year   28864 non-null  int64   
 13  ntd_id_2022        28864 non-null  object  
 14  rtpa_name          28864 non-null  object  
 15  _merge             28864 non-null  category
 16  prev

None

Metropolitan Transportation Commission                      6600
Los Angeles County Metropolitan Transportation Authority    6248
San Diego Association of Governments                        1936
Sacramento Area Council of Governments                      1936
Riverside County Transportation Commission                  1232
San Joaquin Council of Governments                          1144
Santa Barbara County Association of Governments              968
Orange County Transportation Authority                       968
Placer County Transportation Planning Agency                 880
Ventura County Transportation Commission                     880
Tulare County Association of Governments                     792
Stanislaus Council of Governments                            792
San Bernardino County Transportation Authority               704
Transportation Agency for Monterey County                    616
Tahoe Regional Planning Agency                               616
Santa Cruz County Regiona

Unnamed: 0,period_year,period_month,upt,last_report_year,previous_y_m_upt,change_1yr,pct_change_1yr
count,28864.0,28864.0,18637.0,28864.0,18431.0,17504.0,17489.0
mean,2021.181818,6.318182,353482.6,2022.926829,353293.2,-1746.176,-inf
std,2.124277,3.482306,1470315.0,0.435631,1470108.0,525956.3,
min,2018.0,1.0,0.0,2019.0,0.0,-15166180.0,-inf
25%,2019.0,3.0,4059.0,2023.0,4049.0,-1398.5,-0.109
50%,2021.0,6.0,22587.0,2023.0,22509.0,671.5,0.0726
75%,2023.0,9.0,141185.0,2023.0,141098.0,11587.0,0.2388
max,2025.0,12.0,24254920.0,2023.0,24254920.0,6711318.0,1.0


In [28]:
df_check = pd.read_parquet(
        f"{GCS_FILE_PATH}ca_monthly_ridership_{YEAR}_{MONTH}.parquet"
    )

In [31]:
df_check["rtpa_name"].sort_values().unique()
# wher is SLOCOG??!?!
# City of SLO was in SANDAG, adjusted city of slo RTPA in airtable 6/20/2025. should be fixed in the next report update

array(['Butte County Association of Governments',
       'El Dorado County Transportation Commission',
       'Fresno Council of Governments',
       'Imperial County Transportation Commission',
       'Kern Council of Governments',
       'Kings County Association of Governments',
       'Los Angeles County Metropolitan Transportation Authority',
       'Merced County Association of Governments',
       'Metropolitan Transportation Commission',
       'Orange County Transportation Authority',
       'Placer County Transportation Planning Agency',
       'Riverside County Transportation Commission',
       'Sacramento Area Council of Governments',
       'San Bernardino County Transportation Authority',
       'San Diego Association of Governments',
       'San Joaquin Council of Governments',
       'Santa Barbara County Association of Governments',
       'Santa Cruz County Regional Transportation Commission',
       'Shasta Regional Transportation Agency',
       'Stanislaus Council

In [34]:
ntd_rtpa_orgs = (
        tbls.mart_transit_database.dim_organizations()
        >> filter(
            _._is_current == True,
            _.ntd_id_2022.notna(),
            _.rtpa_name.notna(),
        )
        >> select(
            _.name, 
            _.ntd_id_2022, 
            _.rtpa_name, 
            _.mpo_name, 
            _.key
        )
        >> collect()
    )

    # join bridge org county geo to get agency counties
bridge_counties = (
        tbls.mart_transit_database.bridge_organizations_x_headquarters_county_geography()
        >> filter(
            _._is_current == True
        )
        >> select(
            _.county_geography_name, 
            _.organization_key
        )
        >> collect()
    )

In [54]:
ntd_rtpa_orgs[ntd_rtpa_orgs["name"].str.contains("San Luis")]

Unnamed: 0,name,ntd_id_2022,rtpa_name,mpo_name,key
70,San Luis Obispo Council of Governments,90297,San Luis Obispo Council of Governments,,6f5660575e79129ff67bc2c54a2f6c1b
149,City of San Luis Obispo,90156,San Diego Association of Governments,San Diego Association of Governments,113c74aa8cbc098bdd25c8275823f04f
216,San Luis Obispo Regional Transit Authority,90206,San Diego Association of Governments,San Diego Association of Governments,64a73df3f0231d86d20dca85c271c312


In [47]:
bridge_counties["county_geography_name"].value_counts()

Los Angeles        185
San Diego           80
Orange              68
Riverside           59
Alameda             54
San Bernardino      53
Santa Clara         46
San Francisco       43
San Mateo           38
Contra Costa        36
Fresno              33
Sacramento          32
Santa Barbara       26
Kern                26
Ventura             25
Monterey            25
Sonoma              24
San Joaquin         24
Humboldt            24
Mendocino           23
Marin               21
Stanislaus          19
San Luis Obispo     18
Tulare              17
Solano              16
Placer              14
Butte               14
Siskiyou            13
Merced              13
Imperial            13
Lake                12
Santa Cruz          12
Amador              12
Yolo                12
Inyo                11
El Dorado           11
Shasta              11
Del Norte           11
Kings               11
Lassen               9
Napa                 8
Tuolumne             8
Tehama               7
Modoc      

In [40]:
ntd_to_rtpa_crosswalk = ntd_rtpa_orgs.merge(
        bridge_counties, 
        left_on="key", 
        right_on="organization_key", 
        how="left"
    )

In [46]:
ntd_to_rtpa_crosswalk[ntd_to_rtpa_crosswalk["rtpa_name"].str.contains("San Luis")]

Unnamed: 0,name,ntd_id_2022,rtpa_name,mpo_name,key,county_geography_name,organization_key
70,San Luis Obispo Council of Governments,90297,San Luis Obispo Council of Governments,,6f5660575e79129ff67bc2c54a2f6c1b,San Luis Obispo,6f5660575e79129ff67bc2c54a2f6c1b


---

# {rtpa}
## Monthly Ridership Trends

**Download data from our public [folder](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis)** by navigating to `ntd_monthly_ridership` and selecting a file.

Transit operators/agencies that are **Urban full reporters, that submit monthly ridership data to NTD from 2018 to present**, are included in this report. Reporters that were previously Urban full reporters, but are currently not, may appear. This may result in Reporters showing zero or partial ridership data in the report. 

If a Reporter is not a monthly reporter, or has not reported data since 2018, they will not appear in the report.

Examples: 
- Reporter A is an urban full reporter from 2019-2022, then became a reduced reporter for 2023. Reporter A's ridership data will be displayed for 2019-2022 only.
- Reporter B is an urban full reporter from 2000-2017, then became a reduced reporter for 2018. Reporter B will be named in the report, but will not display ridership data.
- Reporter C was a reduced reporter form 2015-2020, then became an urban full reporter and began submitting monthly ridership data to NTD for 2021. Reporter C's ridership data will be displayed for 2021-present.

In [None]:
URL = "https://console.cloud.google.com/storage/" "browser/calitp-publish-data-analysis"

display(
    HTML(
        f"""
        <a href={URL}>
        Download the latest month of data: {PUBLIC_FILENAME}</a>
        """
    )
)

In [None]:
MIN_YEAR = 2018

# updated to filter for period_year greater than 2018
df = (
    pd.read_parquet(
        f"{GCS_FILE_PATH}ca_monthly_ridership_{YEAR}_{MONTH}.parquet",
        filters=[[("RTPA", "==", rtpa), ("period_year", ">=", MIN_YEAR)]],
    )
    .drop(
        # updated columns names to match new df
        columns=["mode", "tos", "execution_ts"]
    )
    .rename(columns={"Mode_full": "Mode", "TOS_full": "TOS"})
)

In [None]:
df.info()

In [None]:
# REMOVE

# find columns that are recent enough to plot
# MIN_YEAR = 2018

# might not need the rest of these
# not_id_cols = [c for c in df.columns if "/" in c]

# recent_years = [
#    c for c in not_id_cols if int(c.split("/")[1]) >= MIN_YEAR and
#    "pct" not in c
# ]

# upt_cols = [
#    c for c in recent_years if "change" not in c
# ]

# change_cols = [c for c in recent_years if "change" in c]

In [None]:
# REMOVE

# do i need this anymore?
# what does the data look like initially? filters/groups the DF by the `group_cols` list, keeps the initial wide data

# give this a try with current data NOPE!
# def OLD_sum_by_group(df: pd.DataFrame, group_cols: list) -> pd.DataFrame:
#    """
#    Since df is wide, use pivot_table() to sum up all
#    the columns that show UPT.
#    """
#    grouped_df = df.pivot_table(
#        index = group_cols,
#        values = "upt",
#        aggfunc="sum"
#    ).reset_index().reindex(columns = group_cols + recent_years)

#    return grouped_df


# DONT THINK I NEED THIS ANYMORE!!!
# def make_long(df: pd.DataFrame, group_cols: list, value_cols: list):
#    df_long = df[group_cols + value_cols].melt(
#        id_vars = group_cols,
#        value_vars = value_cols,
#    )

#    df_long = df_long.assign(
#        variable = df_long.variable.str.replace("change_1yr_", "")
#    )

#    return df_long

In [None]:
## MOVED FUNCTION TO SCRIPT, SO IT CAN BE USED BY save_rtpa_outputs

# function is read in with imports

# UPDATED AND TESTED, this works! gives the same results as the old old sum_by_group, make_long and assemble_long_df functions

# def sum_by_group(
#    df: pd.DataFrame,
#    group_cols: list) -> pd.DataFrame:
#    """
#    since data is now long to begin with, this replaces old sum_by_group, make_long and assemble_long_df functions.
#    """
#    from TEST_monthly_ridership_by_rtpa import get_percent_change
#    grouped_df = df.groupby(group_cols+
#                             ['period_year',
#                             'period_month',
#                             'period_year_month']
#                           ).agg({
#        "upt":"sum",
#        "previous_y_m_upt":"sum",
#        "change_1yr":"sum"
#    }
#    ).reset_index()

# get %change back
#    grouped_df = get_percent_change(grouped_df)

# decimal to whole number
#    grouped_df["pct_change_1yr"] = grouped_df["pct_change_1yr"]*100

#    return grouped_df

In [None]:
# REMOVE

# takes the sum_by_group dfs and makes them long
# unpivots the sum_by_group dfs, each row is a different year/month

# DONT THINK I NEED THIS ANYMORE!!!
# def assemble_long_df(df: pd.DataFrame, group_cols: list) -> pd.DataFrame:
#    """
#    Need df to be long to make chart.
#    Let's put raw UPT and change side-by-side.
#    """
#    df_raw = make_long(df, group_cols, upt_cols).rename(
#        columns = {"value": "upt"})
#    df_change = make_long(df, group_cols, change_cols).rename(
#        columns = {"value": "change_1yr"})

#    final = pd.merge(
#        df_raw,
#        df_change,
#        on = group_cols + ["variable"],
#        how = "left"
#    )

#    final = final.assign(
#        year = final.variable.str.split("/", expand=True)[1],
#        month = final.variable.str.split("/", expand=True)[0].str.zfill(2)
#    )

#    final = final.assign(
#        year_month = final.year + "-" + final.month
#    )

#    return final

In [None]:
# KEEPING AS IS FOR NOW, BUT MAY NEED TO REWORK


def remove_zero_upt_rows(df: pd.DataFrame) -> pd.DataFrame:
    """
    takes the by_agency_long df, filters for rows with zero UPT AND zero change_1yr.
    then removes these rows from the by_agency_long df.
    resulting df should only contain rows with change in UPT not due to (0 UPT - 0 UPT) and net zero change in UPT
    zero UPT and zero change_1yr occurs when (0 upt - 0upt = 0change), dont need to show this
    but a net zero change (100upt - 100upt = 0 change), should be shown
    """

    # df of rows with zero UPT for 2 years
    zero_UPT_2years = df[(df["upt"] == 0) & (df["change_1yr"] == 0)]

    merge = df.merge(zero_UPT_2years, how="left", indicator=True)
    no_zero_UPT_rows = (
        merge[merge["_merge"] == "left_only"]
        .drop(columns=["_merge"])
        .dropna(subset="change_1yr")
    )

    return no_zero_UPT_rows

In [None]:
def group_by_agency(df):
    """
    Take in the 'by_ageny_long' df and aggregatese by rtpa, and calculates upt % of total.
    To be used in pie chart
    """
    initial_agg = df.groupby("agency").agg(total_upt=("upt", "sum")).reset_index()

    # % total columns
    initial_agg["pct_of_total_upt"] = (
        initial_agg["total_upt"] / initial_agg["total_upt"].sum()
    ) * 100

    # cleaning data types and rounding
    initial_agg["total_upt"] = initial_agg["total_upt"].astype("int64")
    initial_agg["pct_of_total_upt"] = initial_agg["pct_of_total_upt"].round(decimals=2)
    cleaned_agg = initial_agg.sort_values(by="total_upt", ascending=False)

    return cleaned_agg

In [None]:
# NEW

agency_cols = ["ntd_id", "agency", "RTPA"]
mode_cols = ["Mode", "RTPA"]
tos_cols = ["TOS", "RTPA"]

# sum_by_group still works! now that it is imported!
by_agency_long = sum_by_group(df, agency_cols)
by_mode_long = sum_by_group(df, mode_cols)
by_tos_long = sum_by_group(df, tos_cols)

# zero upt still works!
by_agency_long_no_zero_upt = remove_zero_upt_rows(by_agency_long)

# total UPT check

total_upt = by_agency_long["upt"].sum()
agency_count = by_agency_long["agency"].nunique()


agency_agg_yr = group_by_agency(by_agency_long)

In [None]:
# REMOVE

# I STILL NEED FILTERED df BY AGENCY, MODE AND TOS

# by_agency_long = assemble_long_df(by_agency, agency_cols)
# by_mode_long = assemble_long_df(by_mode, mode_cols)
# by_tos_long = assemble_long_df(by_tos, tos_cols)

# by_agency_long_no_zero_upt = remove_zero_upt_rows(by_agency_long)

In [None]:
LABELING_DICT = {
    "upt": "Unlinked Passenger Trips",
    "change_1yr": "Change in Unlinked Passenger Trips from Prior Year",
    "TOS": "Type of Service",
    "year_month": "Date",
}


def labeling(word: str) -> str:
    return label_visualization(word, LABELING_DICT)


WIDTH = 300
HEIGHT = 150

In [None]:
def make_line_chart(
    df: pd.DataFrame,
    y_col: str,
    color_col: str,
) -> alt.Chart:
    df = df[df[y_col] > 0].dropna(subset=y_col)

    x_label = [
        i
        for i in df.period_year_month.unique()
        if any(substring in i for substring in ["-01", "-06"])
    ]
    chart = (
        (
            alt.Chart(df)
            .mark_line()
            .encode(
                x=alt.X(
                    "period_year_month:O", axis=alt.Axis(values=x_label), title="Date"
                ),
                y=alt.Y(y_col, title=labeling(y_col)),
                color=alt.Color(
                    color_col,
                    title="",
                    scale=alt.Scale(
                        range=cp.CALITP_CATEGORY_BRIGHT_COLORS
                        + cp.CALITP_CATEGORY_BOLD_COLORS
                    ),
                ),
                tooltip=["period_year_month", y_col, color_col, "RTPA"],
            )
            .properties(width=WIDTH, height=HEIGHT)
            .facet(color_col, columns=2, title="")
            .resolve_scale(y="independent")
        )
        .properties(title=f"{labeling(y_col)} by {labeling(color_col)}")
        .interactive()
    )

    return chart

In [None]:
def make_bar_chart(
    df: pd.DataFrame,
    y_col: str,
    color_col: str,
) -> alt.Chart:

    def short_label(word):
        shorten_dict = {
            "change_1yr": "Change",
            "pct_change_1yr": "Change",
        }
        return shorten_dict[word]

    # For change column, we are missing everything prior to 2023
    # df = df.dropna(subset = y_col)

    # need flag for y_col >,<, 0, missing?
    # count function to how many agencies fall in those categories, then look at those agencies
    # present table

    x_label = [
        i
        for i in df.period_year_month.unique()
        if any(substring in i for substring in ["-01", "-03", "-06", "-09"])
    ]

    chart = (
        (
            alt.Chart(df)
            .mark_bar()
            .encode(
                x=alt.X(
                    "period_year_month:O", axis=alt.Axis(values=x_label), title="Date"
                ),
                y=alt.Y(y_col, title=short_label(y_col)),
                color=alt.Color(
                    color_col,
                    title="",
                    scale=alt.Scale(
                        range=cp.CALITP_CATEGORY_BRIGHT_COLORS
                        + cp.CALITP_CATEGORY_BOLD_COLORS
                    ),
                ),
                tooltip=["period_year_month", y_col, color_col, "RTPA"],
            )
            .properties(width=WIDTH, height=HEIGHT)
            .facet(color_col, columns=2, title="")
            .resolve_scale(x="shared", y="independent")
        )
        .properties(title=f"{labeling(y_col)} by {labeling(color_col)}")
        .interactive()
    )

    return chart

In [None]:
### initial pie code
def make_pie_chart(df, col, color_col):

    pie = (
        alt.Chart(df)
        .mark_arc(radius=150)
        .encode(
            theta=col,
            color=color_col,
            tooltip=["agency", "total_upt", "pct_of_total_upt"],
        )
        .properties(
            title=f"Total Unlinked Passenger Trips per agency in RTPA since {MIN_YEAR}",
        )
    )

    return pie

In [None]:
## moved to prod notebook 1/15/2025
# simple bar chart for total agencies and UPT
def total_upt_chart(df: pd.DataFrame, x_col: str, y_col: str, tool_tip: list):
    bar_chart = (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X(x_col).sort("-y"),
            y=alt.Y(y_col),
            tooltip=tool_tip,
            color=alt.Color(
                x_col,
                title="",
                scale=alt.Scale(
                    range=cp.CALITP_CATEGORY_BRIGHT_COLORS
                    + cp.CALITP_CATEGORY_BOLD_COLORS
                ),
            ),
        )
        .properties(
            title=f"Total Unlinked Passenger Trips per agency in RTPA since {MIN_YEAR}",
            width=WIDTH,
            height=HEIGHT,
        )
        .resolve_scale(y="independent")
        .interactive()
    )

    return bar_chart

### Totals

In [None]:
Markdown(
    f"""
Within {rtpa}:
- Number of Agencies/Operators: <b>{agency_count}</b>.
- Total Unlinked Passenger Trips since {MIN_YEAR}: <b>{total_upt:,}</b>.
- Individual agency/operator ridership breakdown:
"""
)

In [None]:
display(agency_agg_yr.reset_index(drop=True))

In [None]:
df_html = agency_agg_yr.reset_index(drop=True).to_html()

print(df_html)

In [None]:
df_markdown = agency_agg_yr.reset_index(drop=True).to_markdown()

print(df_markdown)

In [None]:
tooltip_list = ["agency", "total_upt", "pct_of_total_upt"]

total_upt_chart(agency_agg_yr, x_col="agency", y_col="total_upt", tool_tip=tooltip_list)

In [None]:
make_pie_chart(agency_agg_yr, col="total_upt", color_col="agency")

### Transit Agency

In [None]:
make_line_chart(by_agency_long, y_col="upt", color_col="agency")

Change in Unlinked Passenger Trips from the prior year. For example, July 2023's change would be the change in July 2023's reported values against July 2022's reported values.

In [None]:
# TEST OF BAR CHART WITH % CHANGE 1 YEAR
make_bar_chart(
    by_agency_long_no_zero_upt[by_agency_long_no_zero_upt["period_year"] >= 2018],
    y_col="pct_change_1yr",
    color_col="agency",
)

In [None]:
make_bar_chart(
    by_agency_long_no_zero_upt[by_agency_long_no_zero_upt["period_year"] >= 2024],
    y_col="change_1yr",
    color_col="agency",
)

In [None]:
make_bar_chart(by_agency_long_no_zero_upt, y_col="change_1yr", color_col="agency")

### Transit Mode

In [None]:
make_line_chart(by_mode_long, y_col="upt", color_col="Mode")

In [None]:
make_bar_chart(
    by_mode_long[by_mode_long["period_year"] >= 2024],
    y_col="change_1yr",
    color_col="Mode",
)

### Type of Service

In [None]:
make_line_chart(by_tos_long, y_col="upt", color_col="TOS")

In [None]:
make_bar_chart(
    by_tos_long[by_tos_long["period_year"] >= 2024], y_col="change_1yr", color_col="TOS"
)