In [1]:
%%capture
import sys
sys.path.append("../../bus_service_increase")
sys.path.append("../")
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp_data_analysis.magics
import pandas as pd
import annual_ridership_module

from IPython.display import display, HTML, Markdown

from bus_service_utils import chart_utils
from calitp_data_analysis import calitp_color_palette as cp
from update_vars import GCS_FILE_PATH, PUBLIC_FILENAME, YEAR, MONTH

#from shared_utils.rt_dates import MONTH_DICT

#alt.renderers.enable("html")
alt.data_transformers.enable('default', max_rows=None)

WIDTH = 300
HEIGHT = 150

In [2]:
# parameters cell for local
rtpa = "Kern Council of Governments"

In [3]:
%%capture_parameters
rtpa

{"rtpa": "Kern Council of Governments"}


# {rtpa}
## Annual Ridership Trends

**Download data from our public [folder](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis)** by navigating to `ntd_annual_ridership` and selecting a file.

Transit operators/agencies that aubmit annual reports to NTD are included in this report. Reporters that were previously Urban full reporters, but are currently not, may appear. This may result in Reporters showing zero or partial ridership data in the report. 

If a Reporter is not a monthly reporter, or has not reported data since 2018, they will not appear in the report.

Examples: 
- **Reporter A** is an urban full reporter from 2019-2022, then became a reduced reporter for 2023. Reporter A's ridership data will be displayed for 2019-2022 only.
- **Reporter B** is an urban full reporter from 2000-2017, then became a reduced reporter for 2018. Reporter B will be named in the report, but will not display ridership data.
- **Reporter C** was a reduced reporter form 2015-2020, then became an urban full reporter and began submitting monthly ridership data to NTD for 2021. Reporter C's ridership data will be displayed for 2021-present. 

In [4]:
URL = ("https://console.cloud.google.com/storage/"
       "browser/calitp-publish-data-analysis"
      )

display(
    HTML(
        f"""
        <a href={URL}>
        Download the latest month of data: {PUBLIC_FILENAME}</a>
        """
    )
)

In [5]:
#read in data
df = pd.read_parquet(
    f"{GCS_FILE_PATH}annual_ridership_report_data.parquet",
    filters=[("RTPA","==",rtpa)]
)

In [6]:
def sum_by_group(
    df: pd.DataFrame,
    group_cols: list) -> pd.DataFrame:
    """
    since data is now long to begin with, this replaces old sum_by_group, make_long and assemble_long_df functions.
    
    """
    grouped_df = df.groupby(group_cols+
                            ["report_year"]
                           ).agg({
        "upt":"sum",
        "vrm":"sum",
        "vrh":"sum",
        "previous_y_upt":"sum",
        "change_1yr":"sum"
    }
    ).reset_index()
    
    #get %change back
    grouped_df = annual_ridership_module.get_percent_change(grouped_df)
    
    #decimal to whole number
    grouped_df["pct_change_1yr"] = grouped_df["pct_change_1yr"]*100
    
    return grouped_df

In [7]:
def group_by_agency(df):
    """
    Take in the 'by_ageny_long' df and aggregatese by rtpa, and calculates upt % of total. 
    To be used in pie chart
    """
    initial_agg = df.groupby("agency_y").agg(
        total_upt=("upt","sum")
    ).reset_index()
    
    # % total columns
    initial_agg["pct_of_total_upt"] = (initial_agg["total_upt"]/initial_agg["total_upt"].sum())*100
    
    # cleaning data types and rounding
    initial_agg["total_upt"] = initial_agg["total_upt"].astype("int64")
    initial_agg["pct_of_total_upt"] = initial_agg["pct_of_total_upt"].round(decimals=2)
    cleaned_agg = initial_agg.sort_values(by="total_upt", ascending = False)
    
    return cleaned_agg

In [8]:
#agency_y seems more consistent
agency_cols = ["ntd_id", "agency_y", "RTPA"]

by_agency_long = sum_by_group(df, agency_cols)

In [9]:
# agg by agency, for pie chart
agency_agg_yr = group_by_agency(by_agency_long)

# total UPT check 
total_upt = by_agency_long["upt"].sum()
agency_count = by_agency_long["agency_y"].nunique()

In [10]:
def make_line_chart(
    df: pd.DataFrame, 
    y_col: str,
    color_col: str,
) -> alt.Chart:
    df = df[df[y_col] > 0].dropna(subset = y_col)

    #x_label = [i for i in df.report_year.unique() if 
    #           any(substring in i for substring in 
    #               ["-01", "-06"])
    #          ]    
    
    chart = (alt.Chart(df)
         .mark_line(point=True)
         .encode(
             x = alt.X("report_year:O", 
                       #axis=alt.Axis(values = x_label), 
                       title = "Date"
                      ),
             y = alt.Y(
                 y_col, 
                 title = y_col
             ),
             color = alt.Color(color_col, title = "", 
                               scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS
                               )),
             tooltip = ["report_year", y_col, color_col, "RTPA"]
         ).properties(width = WIDTH, height = HEIGHT)
         .facet(color_col, columns=2, title = "")
         .resolve_scale(y="independent")
    ).properties(
        title = f"{y_col} by {color_col}"
    ).interactive()
        
    return chart

In [11]:
def make_bar_chart(
    df: pd.DataFrame, 
    y_col: str,
    color_col: str,
) -> alt.Chart:
    
    def short_label(word):
        shorten_dict = {
            "change_1yr": "Change",
            "pct_change_1yr":"Change",
        }
        return shorten_dict[word]
    
    # For change column, we are missing everything prior to 2023 
    #df = df.dropna(subset = y_col)
    
    #need flag for y_col >,<, 0, missing?
    #count function to how many agencies fall in those categories, then look at those agencies
    #present table
    
    #x_label = [i for i in df.report_year.unique() if 
    #           any(substring in i for substring in 
    #               ["-01", "-03", "-06", "-09"])
    #          ]
    
    chart = (alt.Chart(df)
         .mark_bar()
         .encode(
             x = alt.X("report_year:O", 
                       #axis=alt.Axis(values = x_label), 
                       title = "Date"
                      ),
             y = alt.Y(
                 y_col, 
                 title = y_col
             ),
             color = alt.Color(color_col, title = "", 
                              scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS
                              )),
             tooltip = ["report_year", y_col, color_col, "RTPA"]
         ).properties(width = WIDTH, height = HEIGHT)
         .facet(color_col, columns=2, title = "")
         .resolve_scale(x="shared", 
                        y="independent")
    ).properties(
        title = f"{y_col} by {color_col}"
    ).interactive()
    
    return chart

In [12]:
def make_pie_chart(df,
             col,
             color_col):
    
    pie = alt.Chart(df).mark_arc(radius = 150).encode(
        theta=col,
        color=color_col,
        tooltip = ["agency","total_upt","pct_of_total_upt"]
    ).properties(
        title=f"Total Unlinked Passenger Trips per Reporter in RTPA since {MIN_YEAR}",

    )

    return pie

In [13]:
#simple bar chart for total agencies and UPT
def total_upt_chart(df: pd.DataFrame, x_col:str, y_col:str,tool_tip:list):
    bar_chart = alt.Chart(df).mark_bar().encode(
    x=alt.X(x_col).sort("-y"),
    y=alt.Y(y_col),
    tooltip = tool_tip,
    color = alt.Color(x_col, title = "", 
                               scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS
                               ))
    ).properties(
    title=f"Total Unlinked Passenger Trips per Reporter in RTPA since #####",
    width = WIDTH, 
    height = HEIGHT,
    ).resolve_scale(y="independent").interactive()
    
    return bar_chart

### Report Totals

In [14]:
Markdown(f"""
Within {rtpa}:
- Number of Reporters: <b>{agency_count}</b>.
- Total Unlinked Passenger Trips since the beginning of this report: <b>{total_upt:,}</b>.
- Individual Reporters ridership breakdown:
""")


Within Kern Council of Governments:
- Number of Reporters: <b>11</b>.
- Total Unlinked Passenger Trips since the beginning of this report: <b>7,316,982.0</b>.
- Individual Reporters ridership breakdown:


In [15]:
display(agency_agg_yr.reset_index(drop=True))

Unnamed: 0,agency_y,total_upt,pct_of_total_upt
0,Golden Empire Transit District,6494639,88.76
1,Kern Regional Transit,414782,5.67
2,City of Delano,149685,2.05
3,City of Arvin,117719,1.61
4,City of Taft,47880,0.65
5,City of Shafter,32545,0.44
6,City of California City,20292,0.28
7,"City of Ridgecrest, dba: Ridgerunner",15306,0.21
8,City of Wasco,14594,0.2
9,"City of McFarland, dba: McFarland City Transit...",6189,0.08


In [16]:
tooltip_list = ["agency_y","total_upt","pct_of_total_upt"]

total_upt_chart(
    agency_agg_yr,
    x_col="agency_y", 
    y_col="total_upt", 
    tool_tip=tooltip_list)

### Reporter

In [17]:
make_line_chart(
    df, 
    y_col = "upt", 
    color_col = "agency_y")


In [18]:
make_bar_chart(
    by_agency_long, 
    y_col = "change_1yr", 
    color_col = "agency_y")