In [None]:
import pandas as pd
import update_vars
import utils

import altair as alt
import datetime as dt

from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
gcsgp = GCSGeoPandas()

In [None]:
ridership = utils.read_format_ridership()

# Time Series Ridership Info

* Ridership data sourced from Amtrak-provided Origin-Destination Ridership and Revenue
    * Amtrak Route 56 is combined into Route 6 (Central Valley - San Jose)

In [None]:
# monthly = ridership.groupby(['ca_bus_route', 'trip_month', 'trip_year'])[['ridership', 'revenue']].sum().reset_index()
monthly = ridership.groupby(['route_short_name', 'trip_month', 'trip_year'])[['ridership', 'revenue']].sum().reset_index()

In [None]:
date_from_row = lambda row: dt.date(year=row.trip_year, month=row.trip_month, day=1)

In [None]:
monthly = monthly.assign(date = monthly.apply(date_from_row, axis=1).astype('datetime64'))

In [None]:
shape_df = gcsgp.read_parquet(f'{update_vars.GCS_PATH}intermediate/sanj_shapes_trip_info_{update_vars.ANALYSIS_DATE}.parquet')

In [None]:
shape_df = shape_df[['route_short_name', 'route_long_name']].drop_duplicates()

In [None]:
monthly = monthly.merge(shape_df, on='route_short_name', how='left')

## Trend by route

* can mouse over points for additional info, scroll, and zoom chart
* shift-click routes in the legend to select or deselect for highlight
* updated with LOSSAN route

In [None]:
selection = alt.selection_point(fields=['route_short_name'], bind='legend')

(alt.Chart(monthly)
 .mark_line(point=True)
 .encode(
    x=alt.X('date:T', axis=alt.Axis(format="%Y %b")),
    y='ridership:Q',
    color='route_short_name:N',
    tooltip=['route_short_name', alt.Tooltip('ridership:Q', format=','), alt.Tooltip('revenue:Q', format=','), 'date',
            'route_long_name'],
    opacity=alt.when(selection).then(alt.value(1)).otherwise(alt.value(0.2))
    )
 .properties(width=800, height=400)
 .interactive()
).add_params(
    selection
)

In [None]:
pct_change_rider_revenue = lambda df: df.set_index(['date']).sort_index()[['ridership', 'revenue']].pct_change(periods=12)

In [None]:
df = monthly.groupby(['route_short_name']).apply(pct_change_rider_revenue)

In [None]:
yoy = df.reset_index().dropna().round(2)

In [None]:
yoy = yoy.rename(columns={'ridership':'ridership_yoy_change', 'revenue':'revenue_yoy_change'})

In [None]:
yoy = yoy.merge(monthly, on = ['route_short_name', 'date'])

## Year-over-year change

* bubbles above dashed zero line show a positive year over year change
* size of the bubble indicates absolute ridership
* can mouse over points for additional info, scroll, and zoom chart
* shift-click routes in the legend to select or deselect for highlight

In [None]:
chart = alt.Chart(yoy)

points = (chart
 .mark_point()
 .encode(
    x=alt.X('date:T', axis=alt.Axis(format="%Y %b")),
    y='ridership_yoy_change:Q',
    color='route_short_name:N',
    size='ridership:Q',
    tooltip=['route_short_name', alt.Tooltip('ridership_yoy_change:Q', format='.0%'), alt.Tooltip('revenue_yoy_change:Q', format='.0%'), 'date',
            alt.Tooltip('ridership:Q', format=','), alt.Tooltip('revenue:Q', format=','), 'route_long_name'],
    opacity=alt.when(selection).then(alt.value(1)).otherwise(alt.value(0.2))
    )
 .properties(width=800, height=400)
 .interactive()
).add_params(
    selection
)

rule = chart.mark_rule(strokeDash=[2, 2]).encode(
    y=alt.datum(0)
)

points + rule