In [1]:
%%capture
import sys
sys.path.append("../../bus_service_increase")
sys.path.append("../")
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp_data_analysis.magics
import pandas as pd
import annual_ridership_module

from IPython.display import display, HTML, Markdown

from bus_service_utils import chart_utils
from calitp_data_analysis import calitp_color_palette as cp
from update_vars import GCS_FILE_PATH, PUBLIC_FILENAME, YEAR, MONTH

#from shared_utils.rt_dates import MONTH_DICT

#alt.renderers.enable("html")
alt.data_transformers.enable('default', max_rows=None)

WIDTH = 300
HEIGHT = 150

In [2]:
# parameters cell for local
rtpa = "Metropolitan Transportation Commission"

In [3]:
%%capture_parameters
rtpa

{"rtpa": "Metropolitan Transportation Commission"}


# {rtpa}
## Annual Ridership Trends

**Download data from our public [folder](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis)** by navigating to `ntd_annual_ridership` and selecting a file.

Transit operators/agencies that aubmit annual reports to NTD are included in this report. Reporters that were previously Urban full reporters, but are currently not, may appear. This may result in Reporters showing zero or partial ridership data in the report. 

If a Reporter is not a monthly reporter, or has not reported data since 2018, they will not appear in the report.

Examples: 
- **Reporter A** is an urban full reporter from 2019-2022, then became a reduced reporter for 2023. Reporter A's ridership data will be displayed for 2019-2022 only.
- **Reporter B** is an urban full reporter from 2000-2017, then became a reduced reporter for 2018. Reporter B will be named in the report, but will not display ridership data.
- **Reporter C** was a reduced reporter form 2015-2020, then became an urban full reporter and began submitting monthly ridership data to NTD for 2021. Reporter C's ridership data will be displayed for 2021-present. 

In [4]:
URL = ("https://console.cloud.google.com/storage/"
       "browser/calitp-publish-data-analysis"
      )

display(
    HTML(
        f"""
        <a href={URL}>
        Download the latest month of data: {PUBLIC_FILENAME}</a>
        """
    )
)

In [5]:
#read in data
df = pd.read_parquet(
    f"{GCS_FILE_PATH}annual_ridership_report_data.parquet"
)

In [6]:
df.columns

Index(['report_year', 'ntd_id', 'agency_x', 'reporter_type_x',
       'organization_type', 'city_x', 'state', 'primary_uza_name', 'vrh',
       'vrm', 'upt', 'agency_y', 'reporter_type_y', 'city_y', 'RTPA', '_merge',
       'previous_y_upt', 'change_1yr', 'pct_change_1yr'],
      dtype='object')

In [7]:
def sum_by_group(
    df: pd.DataFrame,
    group_cols: list) -> pd.DataFrame:
    """
    since data is now long to begin with, this replaces old sum_by_group, make_long and assemble_long_df functions.
    
    """
    grouped_df = df.groupby(group_cols+
                            ["report_year"]
                           ).agg({
        "upt":"sum",
        "vrm":"sum",
        "vrh":"sum",
        "previous_y_upt":"sum",
        "change_1yr":"sum"
    }
    ).reset_index()
    
    #get %change back
    grouped_df = annual_ridership_module.get_percent_change(grouped_df)
    
    #decimal to whole number
    grouped_df["pct_change_1yr"] = grouped_df["pct_change_1yr"]*100
    
    return grouped_df

In [8]:
def group_by_agency(df):
    """
    Take in the 'by_ageny_long' df and aggregatese by rtpa, and calculates upt % of total. 
    To be used in pie chart
    """
    initial_agg = df.groupby("agency_x").agg(
        total_upt=("upt","sum")
    ).reset_index()
    
    # % total columns
    initial_agg["pct_of_total_upt"] = (initial_agg["total_upt"]/initial_agg["total_upt"].sum())*100
    
    # cleaning data types and rounding
    initial_agg["total_upt"] = initial_agg["total_upt"].astype("int64")
    initial_agg["pct_of_total_upt"] = initial_agg["pct_of_total_upt"].round(decimals=2)
    cleaned_agg = initial_agg.sort_values(by="total_upt", ascending = False)
    
    return cleaned_agg

In [9]:
agency_cols = ["ntd_id", "agency_x", "RTPA"]

by_agency_long = sum_by_group(df, agency_cols)

In [11]:
# agg by agency, for pie chart
agency_agg_yr = group_by_agency(by_agency_long)

# total UPT check 
total_upt = by_agency_long["upt"].sum()
agency_count = by_agency_long["agency_x"].nunique()

In [34]:
display(
    by_agency_long.head(),
    agency_agg_yr.head()
)

Unnamed: 0,ntd_id,agency_x,RTPA,report_year,upt,vrm,vrh,previous_y_upt,change_1yr,pct_change_1yr
0,90003,San Francisco Bay Area Rapid Transit District,Metropolitan Transportation Commission,2022,38224072.0,78554914.0,2486029.0,0.0,0.0,100.0
1,90003,"San Francisco Bay Area Rapid Transit District,...",Metropolitan Transportation Commission,2023,50764402.0,85233749.0,2724074.0,38224072.0,12540330.0,24.7
2,90004,Golden Empire Transit District,Kern Council of Governments,2022,3201046.0,3758507.0,292325.0,0.0,0.0,100.0
3,90004,Golden Empire Transit District,Kern Council of Governments,2023,3293593.0,3924016.0,289338.0,3201046.0,92547.0,2.81
4,90006,Santa Cruz Metropolitan Transit District,Santa Cruz County Regional Transportation Comm...,2022,2837891.0,2952329.0,211715.0,0.0,0.0,100.0


Unnamed: 0,agency_x,total_upt,pct_of_total_upt
203,Los Angeles County Metropolitan Transportation...,530990571,34.45
20,"City and County of San Francisco, dba: San Fra...",241824003,15.69
245,San Diego Metropolitan Transit System,126128614,8.18
228,Orange County Transportation Authority,60428195,3.92
247,"San Francisco Bay Area Rapid Transit District,...",50764402,3.29


In [53]:
def make_line_chart(
    df: pd.DataFrame, 
    y_col: str,
    color_col: str,
) -> alt.Chart:
    df = df[df[y_col] > 0].dropna(subset = y_col)

    #x_label = [i for i in df.report_year.unique() if 
    #           any(substring in i for substring in 
    #               ["-01", "-06"])
    #          ]    
    
    chart = (alt.Chart(df)
         .mark_line(point=True)
         .encode(
             x = alt.X("report_year:O", 
                       #axis=alt.Axis(values = x_label), 
                       title = "Date"
                      ),
             y = alt.Y(
                 y_col, 
                 title = y_col
             ),
             color = alt.Color(color_col, title = "", 
                               scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS
                               )),
             tooltip = ["report_year", y_col, color_col, "RTPA"]
         ).properties(width = WIDTH, height = HEIGHT)
         .facet(color_col, columns=2, title = "")
         .resolve_scale(y="independent")
    ).properties(
        title = f"{y_col} by {color_col}"
    ).interactive()
        
    return chart

In [48]:
def make_bar_chart(
    df: pd.DataFrame, 
    y_col: str,
    color_col: str,
) -> alt.Chart:
    
    def short_label(word):
        shorten_dict = {
            "change_1yr": "Change",
            "pct_change_1yr":"Change",
        }
        return shorten_dict[word]
    
    # For change column, we are missing everything prior to 2023 
    #df = df.dropna(subset = y_col)
    
    #need flag for y_col >,<, 0, missing?
    #count function to how many agencies fall in those categories, then look at those agencies
    #present table
    
    #x_label = [i for i in df.report_year.unique() if 
    #           any(substring in i for substring in 
    #               ["-01", "-03", "-06", "-09"])
    #          ]
    
    chart = (alt.Chart(df)
         .mark_bar()
         .encode(
             x = alt.X("report_year:O", 
                       #axis=alt.Axis(values = x_label), 
                       title = "Date"
                      ),
             y = alt.Y(
                 y_col, 
                 title = y_col
             ),
             color = alt.Color(color_col, title = "", 
                              scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS
                              )),
             tooltip = ["report_year", y_col, color_col, "RTPA"]
         ).properties(width = WIDTH, height = HEIGHT)
         .facet(color_col, columns=2, title = "")
         .resolve_scale(x="shared", 
                        y="independent")
    ).properties(
        title = f"{y_col} by {color_col}"
    ).interactive()
    
    return chart

In [16]:
def make_pie_chart(df,
             col,
             color_col):
    
    pie = alt.Chart(df).mark_arc(radius = 150).encode(
        theta=col,
        color=color_col,
        tooltip = ["agency","total_upt","pct_of_total_upt"]
    ).properties(
        title=f"Total Unlinked Passenger Trips per Reporter in RTPA since {MIN_YEAR}",

    )

    return pie

In [17]:
#simple bar chart for total agencies and UPT
def total_upt_chart(df: pd.DataFrame, x_col:str, y_col:str,tool_tip:list):
    bar_chart = alt.Chart(df).mark_bar().encode(
    x=alt.X(x_col).sort("-y"),
    y=alt.Y(y_col),
    tooltip = tool_tip,
    color = alt.Color(x_col, title = "", 
                               scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS
                               ))
    ).properties(
    title=f"Total Unlinked Passenger Trips per Reporter in RTPA since #####",
    width = WIDTH, 
    height = HEIGHT,
    ).resolve_scale(y="independent").interactive()
    
    return bar_chart

### Report Totals

In [18]:
Markdown(f"""
Within {rtpa}:
- Number of Reporters: <b>{agency_count}</b>.
- Total Unlinked Passenger Trips since the beginning of this report: <b>{total_upt:,}</b>.
- Individual Reporters ridership breakdown:
""")


Within Metropolitan Transportation Commission:
- Number of Reporters: <b>295</b>.
- Total Unlinked Passenger Trips since the beginning of this report: <b>1,541,210,691.0</b>.
- Individual Reporters ridership breakdown:


In [19]:
display(agency_agg_yr.reset_index(drop=True))

Unnamed: 0,agency_x,total_upt,pct_of_total_upt
0,Los Angeles County Metropolitan Transportation...,530990571,34.45
1,"City and County of San Francisco, dba: San Fra...",241824003,15.69
2,San Diego Metropolitan Transit System,126128614,8.18
3,Orange County Transportation Authority,60428195,3.92
4,"San Francisco Bay Area Rapid Transit District,...",50764402,3.29
...,...,...,...
290,North Fork Rancheria of Mono Indians of Califo...,602,0.00
291,North Fork Rancheria of Mono Indians of Califo...,484,0.00
292,Alpine County Local Transportation Commission,384,0.00
293,County of Alpine,325,0.00


In [49]:
tooltip_list = ["agency_x","total_upt","pct_of_total_upt"]

total_upt_chart(
    agency_agg_yr,
    x_col="agency_x", 
    y_col="total_upt", 
    tool_tip=tooltip_list)

### Reporter

In [52]:
#sanity check
df[df["agency_x"]=="Access Services"]

Unnamed: 0,report_year,ntd_id,agency_x,reporter_type_x,organization_type,city_x,state,primary_uza_name,vrh,vrm,upt,agency_y,reporter_type_y,city_y,RTPA,_merge,previous_y_upt,change_1yr,pct_change_1yr
102,2023,90157,Access Services,Full Reporter,Independent Public Agency or Authority of Tran...,El Monte,CA,"Los Angeles--Long Beach--Anaheim, CA",1929171.0,31969398.0,3545374.0,Access Services,Full Reporter,El Monte,Southern California Association of Governments,both,2927484.0,617890.0,0.1743
103,2022,90157,Access Services,Full Reporter,Independent Public Agency or Authority of Tran...,El Monte,CA,"Los Angeles--Long Beach--Anaheim, CA",1575462.0,28077169.0,2927484.0,Access Services,Full Reporter,El Monte,Southern California Association of Governments,both,,,


In [54]:
make_line_chart(
    df, 
    y_col = "upt", 
    color_col = "agency_x")


In [40]:
make_bar_chart(
    by_agency_long, 
    y_col = "change_1yr", 
    color_col = "agency_x")