In [1]:
import sys
sys.path.append("../bus_service_increase")

In [2]:
%%capture
import warnings
warnings.filterwarnings('ignore')

import altair as alt
import calitp_data_analysis.magics
import pandas as pd

from IPython.display import display, HTML

from bus_service_utils import chart_utils
from calitp_data_analysis import calitp_color_palette as cp
from update_vars import GCS_FILE_PATH, PUBLIC_FILENAME, YEAR, MONTH
#from monthly_ridership_by_rtpa import get_percent_change

#alt.renderers.enable("html")
alt.data_transformers.enable('default', max_rows=None)

In [3]:
# parameters cell for local
rtpa = "Sacramento Area Council of Governments"

In [4]:
%%capture_parameters
rtpa

{"rtpa": "Sacramento Area Council of Governments"}


# {rtpa}
## Monthly Ridership Trends

**Download data from our public [folder](https://console.cloud.google.com/storage/browser/calitp-publish-data-analysis)** by navigating to `ntd_monthly_ridership` and selecting a file.

In [5]:
URL = ("https://console.cloud.google.com/storage/"
       "browser/calitp-publish-data-analysis"
      )

display(
    HTML(
        f"""
        <a href={URL}>
        Download the latest month of data: {PUBLIC_FILENAME}</a>
        """
    )
)


In [6]:
df = pd.read_parquet(
    f"{GCS_FILE_PATH}ca_monthly_ridership_{YEAR}_{MONTH}.parquet",
    filters = [[("RTPA", "==", rtpa)]]
).drop(
    columns = ["Mode", "TOS"]
).rename(columns = {"Mode_full": "Mode", "TOS_full": "TOS"})

In [7]:
# find columns that are recent enough to plot
MIN_YEAR = 2018

not_id_cols = [c for c in df.columns if "/" in c]

recent_years = [
    c for c in not_id_cols if int(c.split("/")[1]) >= MIN_YEAR and 
    "pct" not in c
]

upt_cols = [
    c for c in recent_years if "change" not in c
]

change_cols = [c for c in recent_years if "change" in c]

In [8]:
def sum_by_group(df: pd.DataFrame, group_cols: list) -> pd.DataFrame:
    """
    Since df is wide, use pivot_table() to sum up all
    the columns that show UPT.
    """
    grouped_df = df.pivot_table(
        index = group_cols, 
        values = recent_years, 
        aggfunc="sum"
    ).reset_index().reindex(columns = group_cols + recent_years)
    
    return grouped_df

def make_long(df: pd.DataFrame, group_cols: list, value_cols: list):
    df_long = df[group_cols + value_cols].melt(
        id_vars = group_cols, 
        value_vars = value_cols,
    )
    
    df_long = df_long.assign(
        variable = df_long.variable.str.replace("change_1yr_", "")
    )
    
    return df_long

In [9]:
agency_cols = ["NTD ID", "Agency", "RTPA"]
mode_cols = ["Mode", "RTPA"]
tos_cols = ["TOS", "RTPA"]

by_agency = sum_by_group(df, agency_cols)
by_mode = sum_by_group(df, mode_cols)
by_tos = sum_by_group(df, tos_cols)

In [10]:
def assemble_long_df(df: pd.DataFrame, group_cols: list) -> pd.DataFrame:
    """
    Need df to be long to make chart.
    Let's put raw UPT and change side-by-side.
    """
    df_raw = make_long(df, group_cols, upt_cols).rename(
        columns = {"value": "upt"})
    df_change = make_long(df, group_cols, change_cols).rename(
        columns = {"value": "change_1yr"})

    final = pd.merge(
        df_raw,
        df_change,
        on = group_cols + ["variable"],
        how = "left"
    )
    
    final = final.assign(
        year = final.variable.str.split("/", expand=True)[1],
        month = final.variable.str.split("/", expand=True)[0].str.zfill(2)
    )
    
    final = final.assign(
        year_month = final.year + "-" + final.month
    )
    
    return final

In [11]:
by_agency_long = assemble_long_df(by_agency, agency_cols)
by_mode_long = assemble_long_df(by_mode, mode_cols)
by_tos_long = assemble_long_df(by_tos, tos_cols)

In [12]:
LABELING_DICT = {
    "upt": "Unlinked Passenger Trips",
    "change_1yr": "Change in Unlinked Passenger Trips from Prior Year",
    "TOS": "Type of Service",
    "year_month": "Date"
}

def labeling(word: str) -> str:
    return chart_utils.labeling(word, LABELING_DICT)

WIDTH = 300
HEIGHT = 150

In [13]:
def make_line_chart(
    df: pd.DataFrame, 
    y_col: str,
    color_col: str,
) -> alt.Chart:
    df = df[df[y_col] > 0].dropna(subset = y_col)

    x_label = [i for i in df.year_month.unique() if 
               any(substring in i for substring in 
                   ["-01", "-06"])
              ]    
    chart = (alt.Chart(df)
         .mark_line()
         .encode(
             x = alt.X("year_month:O", 
                       axis=alt.Axis(values = x_label), 
                       title = "Date"
                      ),
             y = alt.Y(y_col, title = labeling(y_col)),
             color = alt.Color(color_col, title = "", 
                               scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS
                               )),
             tooltip = ["year_month", y_col, color_col, "RTPA"]
         ).properties(width = WIDTH, height = HEIGHT)
         .facet(color_col, columns=2, title = "")
         .resolve_scale(y="independent")
    ).properties(
        title = f"{labeling(y_col)} by {labeling(color_col)}"
    ).interactive()
        
    return chart

In [14]:
# WORKING CELL!!
#from initial df, filter for agencies that had UPT data for the past 12 months

#columns to drop, all the inbetween year 
drop_year_month = list(df.iloc[:,8:261].columns)

#df of recent year data
df.drop(columns=drop_year_month)

# if sum of month/year cols =0 then drop. 

Unnamed: 0,NTD ID,Legacy NTD ID,Agency,Status,Reporter Type,UACE CD,UZA Name,3 Mode,2/2023,3/2023,...,1/2024,2/2024,RTPA,_merge,change_1yr_1/2024,pct_change_1yr_1/2024,change_1yr_2/2024,pct_change_1yr_2/2024,Mode,TOS
59,90019,9019,Sacramento Regional Transit District,Active,Full Reporter,77068.0,"Sacramento, CA",Bus,35196.0,39553.0,...,38350.0,37824.0,Sacramento Area Council of Governments,both,4528.0,0.1181,2628.0,0.0695,Demand Response,Directly Operated
60,90019,9019,Sacramento Regional Transit District,Inactive,Full Reporter,77068.0,"Sacramento, CA",Bus,0.0,0.0,...,0.0,0.0,Sacramento Area Council of Governments,both,0.0,,0.0,,Demand Response,Purchased Transportation
61,90019,9019,Sacramento Regional Transit District,Active,Full Reporter,77068.0,"Sacramento, CA",Bus,5072.0,6229.0,...,7428.0,7794.0,Sacramento Area Council of Governments,both,2538.0,0.3417,2722.0,0.3492,Demand Response,Purchased Transportation - Transportation Netw...
62,90019,9019,Sacramento Regional Transit District,Active,Full Reporter,77068.0,"Sacramento, CA",Rail,472251.0,539858.0,...,550184.0,551037.0,Sacramento Area Council of Governments,both,53919.0,0.098,78786.0,0.143,Light Rail,Directly Operated
63,90019,9019,Sacramento Regional Transit District,Active,Full Reporter,77068.0,"Sacramento, CA",Bus,617943.0,699600.0,...,741731.0,764457.0,Sacramento Area Council of Governments,both,162343.0,0.2189,146514.0,0.1917,Bus,Directly Operated
137,90061,9061,Yuba-Sutter Transit Authority,Active,Small Systems Reporter,97939.0,"Yuba City, CA",Bus,0.0,0.0,...,0.0,0.0,Sacramento Area Council of Governments,both,0.0,,0.0,,Commuter Bus,Purchased Transportation
138,90061,9061,Yuba-Sutter Transit Authority,Active,Small Systems Reporter,97939.0,"Yuba City, CA",Bus,0.0,0.0,...,0.0,0.0,Sacramento Area Council of Governments,both,0.0,,0.0,,Demand Response,Purchased Transportation
139,90061,9061,Yuba-Sutter Transit Authority,Active,Small Systems Reporter,97939.0,"Yuba City, CA",Bus,0.0,0.0,...,0.0,0.0,Sacramento Area Council of Governments,both,0.0,,0.0,,Bus,Purchased Transportation
163,90090,9090,Yolo County Transportation District,Active,Full Reporter,77068.0,"Sacramento, CA",Bus,2512.0,2839.0,...,4338.0,4406.0,Sacramento Area Council of Governments,both,1819.0,0.4193,1894.0,0.4299,Demand Response,Purchased Transportation
164,90090,9090,Yolo County Transportation District,Active,Full Reporter,77068.0,"Sacramento, CA",Bus,52790.0,55647.0,...,46985.0,47798.0,Sacramento Area Council of Governments,both,-5604.0,-0.1193,-4992.0,-0.1044,Bus,Purchased Transportation


In [15]:
recent_agency_data_list

NameError: name 'recent_agency_data_list' is not defined

In [21]:
def make_bar_chart(
    df: pd.DataFrame, 
    y_col: str,
    color_col: str,
) -> alt.Chart:
    
    def short_label(word):
        shorten_dict = {
            "change_1yr": "Change",
        }
        return shorten_dict[word]
    
    # For change column, we are missing everything prior to 2023 
    df = df[df[y_col] != 0].dropna(subset = y_col)
    
    #need flag for y_col >,<, 0, missing?
    #count function to how many agencies fall in those categories, then look at those agencies
    #present table
    
    x_label = [i for i in df.year_month.unique() if 
               any(substring in i for substring in 
                   ["-01", "-03", "-06", "-09"])
              ]
    
    chart = (alt.Chart(df)
         .mark_bar()
         .encode(
             x = alt.X("year_month:O", 
                       axis=alt.Axis(values = x_label), 
                       title = "Date"
                      ),
             y = alt.Y(y_col, title = short_label(y_col)),
             color = alt.Color(color_col, title = "", 
                              scale = alt.Scale(
                                   range = cp.CALITP_CATEGORY_BRIGHT_COLORS + 
                                   cp.CALITP_CATEGORY_BOLD_COLORS
                              )),
             tooltip = ["year_month", y_col, color_col, "RTPA"]
         ).properties(width = WIDTH, height = HEIGHT)
         .facet(color_col, columns=2, title = "")
         .resolve_scale(x="shared", 
                        y="independent")
    ).properties(
        title = f"{labeling(y_col)} by {labeling(color_col)}"
    ).interactive()
    
    return chart

### Transit Agency

In [None]:
make_line_chart(by_agency_long, y_col = "upt", color_col = "Agency")

In [23]:
by_agency_long

Unnamed: 0,NTD ID,Agency,RTPA,variable,upt,change_1yr,year,month,year_month
0,90019,Sacramento Regional Transit District,Sacramento Area Council of Governments,1/2018,1624666.0,,2018,01,2018-01
1,90061,Yuba-Sutter Transit Authority,Sacramento Area Council of Governments,1/2018,81831.0,,2018,01,2018-01
2,90090,Yolo County Transportation District,Sacramento Area Council of Governments,1/2018,104939.0,,2018,01,2018-01
3,90142,"University of California, Davis",Sacramento Area Council of Governments,1/2018,431420.0,,2018,01,2018-01
4,90168,City of Roseville,Sacramento Area Council of Governments,1/2018,0.0,,2018,01,2018-01
...,...,...,...,...,...,...,...,...,...
883,90216,County of Sacramento Municipal Services Agency...,Sacramento Area Council of Governments,2/2024,0.0,0.0,2024,02,2024-02
884,90220,City of Folsom,Sacramento Area Council of Governments,2/2024,0.0,0.0,2024,02,2024-02
885,90223,"Paratransit, Inc.",Sacramento Area Council of Governments,2/2024,4216.0,413.0,2024,02,2024-02
886,90224,"Paratransit, Inc. CTSA",Sacramento Area Council of Governments,2/2024,0.0,0.0,2024,02,2024-02


In [22]:
make_bar_chart(by_agency_long, y_col = "change_1yr", color_col = "Agency")

### Transit Mode

In [None]:
make_line_chart(by_mode_long, y_col = "upt", color_col = "Mode")

In [None]:
make_bar_chart(by_mode_long, y_col = "change_1yr", color_col = "Mode")

### Type of Service

In [None]:
make_line_chart(by_tos_long, y_col = "upt", color_col = "TOS")

In [None]:
make_bar_chart(by_tos_long, y_col = "change_1yr", color_col = "TOS")