In [None]:
import pandas as pd
import update_vars
import utils

import altair as alt
import datetime as dt

from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
gcsgp = GCSGeoPandas()

In [None]:
ridership = utils.read_format_ridership()

# Time Series Cost Info

* Revenue data sourced from Amtrak-provided Origin-Destination Ridership and Revenue
    * Amtrak Route 56 is combined into Route 6 (Central Valley - San Jose)
* Cost data by route provided by CCJPA and SJJPA, costs for Route 99 reflect a sum of both JPA portions
    * Route 18 (Central Valley - Santa Maria) excluded, many gaps in data which may not reflect full cost.
    * Route 15 (YARTS) is included, but ridership may be an undercount since Amtrak source does not reflect YARTS only riders.

In [None]:
combined_costs = pd.read_excel(f"{update_vars.GCS_PATH}source_data/combined_costs.xlsx")

In [None]:
cost_route_to_route_short_name = {'1A': 'Route 1', '1B': 'Route 1', '1C': 'Route 1c', 99: 'Route 99',
                                 3: 'Route 3', 7: 'Route 7', 6: 'Route 6', '15A/15B': 'Route 15',
                                 }

In [None]:
combined_costs.columns = combined_costs.columns.map(lambda x: x.lower())
combined_costs = combined_costs.rename(columns={'month': 'trip_month', 'year': 'trip_year'})

In [None]:
combined_costs = combined_costs.assign(route_short_name = combined_costs.route.map(cost_route_to_route_short_name))

In [None]:
combined_costs = combined_costs.query('route != 99 | trip_month != 6') #  exclude 6/2025 since we only have ccjpa data

In [None]:
group_cols = ['route_short_name', 'trip_month', 'trip_year']

In [None]:
monthly_costs = combined_costs.groupby(group_cols)[['cost']].sum().reset_index()

In [None]:
# monthly = ridership.groupby(['ca_bus_route', 'trip_month', 'trip_year'])[['ridership', 'revenue']].sum().reset_index()
monthly = ridership.groupby(group_cols)[['ridership', 'revenue']].sum().reset_index()

In [None]:
monthly = monthly.merge(monthly_costs, on = group_cols)

In [None]:
date_from_row = lambda row: dt.date(year=row.trip_year, month=row.trip_month, day=1)

In [None]:
monthly = monthly.assign(date = monthly.apply(date_from_row, axis=1).astype('datetime64'))

In [None]:
shape_df = gcsgp.read_parquet(f'{update_vars.GCS_PATH}intermediate/sanj_shapes_trip_info_{update_vars.ANALYSIS_DATE}.parquet')

In [None]:
shape_df = shape_df[['route_short_name', 'route_long_name']].drop_duplicates()

In [None]:
monthly = monthly.merge(shape_df, on='route_short_name', how='left')

In [None]:
monthly = monthly.assign(net_cost = monthly.cost - monthly.revenue)

In [None]:
def trend_chart(df, col):
    
    title = col.replace('_', ' ').title()
    selection = alt.selection_point(fields=['route_short_name'], bind='legend')
    chart = (alt.Chart(monthly)
     .mark_line(point=True)
     .encode(
        x=alt.X('date:T', axis=alt.Axis(format="%Y %b")).title('Date'),
        y=alt.Y(f'{col}:Q').title(title),
        color=alt.Color('route_short_name:N', legend=alt.Legend(symbolLimit=0, labelFontSize=12, titleFontSize=14)),
        tooltip=['route_short_name', alt.Tooltip(f'{col}:Q', format=','), alt.Tooltip(f'{col}:Q', format=','), 'date',
                'route_long_name'],
        opacity=alt.when(selection).then(alt.value(1)).otherwise(alt.value(0.2))
        )
     .properties(width=800, height=400)
     .interactive()
    ).add_params(
        selection
    )
    chart = chart.configure_axis(labelFontSize=14, titleFontSize=16)
    return chart

## Net Cost Trend by route

* can mouse over points for additional info, scroll, and zoom chart
* shift-click routes in the legend to select or deselect for highlight

In [None]:
trend_chart(monthly, 'net_cost')

## Cost Trend by Route

In [None]:
trend_chart(monthly, 'cost')

## Revenue Trend by Route

In [None]:
trend_chart(monthly, 'revenue')

## Net Cost Per Rider Trend by Route

In [None]:
monthly = monthly.assign(net_cost_per_rider = monthly.net_cost / monthly.ridership)

In [None]:
trend_chart(monthly, 'net_cost_per_rider')