In [None]:
%%capture

import pandas as pd
import geopandas as gpd
from calitp_data_analysis.sql import query_sql
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from calitp_data_analysis.geography_utils import CA_NAD83Albers_m, WGS84
import calitp_data_analysis.magics
gcsgp = GCSGeoPandas()
from shared_utils import gtfs_utils_v2, rt_utils

import utils
import update_vars
import altair as alt
import numpy as np
import shapely
import branca

import importlib
importlib.reload(utils)
importlib.reload(update_vars)

In [None]:
import chart_utils

# Passenger Flow and Ridership Charts by Route and Origin/Destination

* Hues represent origin stops (and may repeat where there are many)
* Within each origin group, darker shades represent longer O/D pairs
* Currently based on April 2025 figures only

In [None]:
analysis_date = update_vars.ANALYSIS_DATE

In [None]:
feeds = gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(analysis_date)

In [None]:
sanj = feeds.query('name == "Amtrak San Joaquins Schedule"')

In [None]:
sanj_shapes = gtfs_utils_v2.get_shapes(analysis_date, operator_feeds=sanj.feed_key, shape_cols=update_vars.shape_cols)
sanj_shapes = sanj_shapes.to_crs(CA_NAD83Albers_m)
sanj_shapes = sanj_shapes.assign(length_meters = sanj_shapes.geometry.length)

In [None]:
sanj_trips = gtfs_utils_v2.get_trips(analysis_date, operator_feeds=sanj.feed_key, trip_cols=update_vars.trip_cols).query('route_type == "3"')

In [None]:
sanj_stops = gtfs_utils_v2.get_stops(analysis_date, operator_feeds=sanj.feed_key, stop_cols=update_vars.stop_cols).to_crs(CA_NAD83Albers_m)

In [None]:
sanj_st = gtfs_utils_v2.get_stop_times(analysis_date, operator_feeds=sanj.feed_key, get_df=True,
                                      trip_df = sanj_trips, stop_time_cols=update_vars.stop_time_cols)

In [None]:
bus_st = sanj_st.merge(sanj_trips[['trip_id', 'direction_id', 'route_id', 'route_short_name',
                                   'shape_array_key', 'shape_id']], on='trip_id')

In [None]:
sanj_shapes_trip_info = (sanj_shapes.merge(sanj_trips.drop_duplicates(subset=['shape_array_key']), on = ['shape_array_key', 'feed_key'])
                            .drop(columns=['base64_url', 'regional_feed_type', 'block_id',
                                           'route_desc', 'agency_id', 'network_id',
                                          'route_key'])
                        )

In [None]:
gcsgp.geo_data_frame_to_parquet(sanj_shapes_trip_info, f'{update_vars.GCS_PATH}intermediate/sanj_shapes_trip_info_{analysis_date}.parquet')

In [None]:
sanj_long_shapes = utils.longest_by_route_dir(sanj_shapes_trip_info)

In [None]:
source_ridership = pd.read_excel(update_vars.RIDERSHIP_PATH)
source_ridership = source_ridership.assign(od = source_ridership.orig + '->' + source_ridership.dest)
source_ridership = source_ridership.assign(route_short_name = source_ridership.ca_bus_route.str.replace('Rt', 'Route'))
source_ridership = source_ridership.query('trip_year == 2025 & trip_month == 4') ## TODO trends, etc

rider_to_gtfs_dict = {'Route 1A': 'Route 1', 'Route 1B': 'Route 1', 'Route 1C': 'Route 1c',
                     'Route 20 - B': 'Route 20', 'Route 3R': 'Route 3'}
strip_zero = lambda route_str: ' '.join([x.lstrip('0') for x in route_str.split(' ')])
rider_to_gtfs = lambda route_str: rider_to_gtfs_dict[route_str] if route_str in rider_to_gtfs_dict.keys() else route_str

source_ridership = source_ridership.assign(route_short_name = source_ridership.route_short_name.map(strip_zero).map(rider_to_gtfs))

In [None]:
bus_stops = sanj_st.stop_id.unique()

In [None]:
get_ridership_unique_stops = lambda df: np.union1d(df.orig, df.dest)

In [None]:
rider_stops = get_ridership_unique_stops(source_ridership)

In [None]:
sanj_bus_to_rider_stops = dict(zip(bus_stops, [x[1:] for x in bus_stops]))
sanj_bus_to_rider_stops['bLOS'] = 'LAX'

In [None]:
bus_st = pd.merge(sanj_stops[['stop_id', 'geometry']], bus_st, on='stop_id')
bus_st = bus_st.assign(amtrak_stop = bus_st.stop_id.map(sanj_bus_to_rider_stops)).sort_values(['trip_id', 'stop_sequence'])

In [None]:
# pd.Series(sanj_bus_to_rider_stops.values()).isin(pd.Series(rider_stops)).value_counts()

# gtfs_no_rider_data = pd.Series(sanj_bus_to_rider_stops.values())[~pd.Series(sanj_bus_to_rider_stops.values()).isin(pd.Series(rider_stops))]
# gtfs_no_rider_data #  OK -- all either served by RABA/YARTS or outside CA...

## Route 1

In [None]:
rt1_test = sanj_long_shapes.query('route_id == "1"')

In [None]:
rt1_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt1_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1A')

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt1_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1A')

In [None]:
chart

## Route 1b

In [None]:
rt1b_test = sanj_long_shapes.query('route_id == "1"')

In [None]:
rt1b_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt1b_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1B')

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt1b_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1B')

In [None]:
chart

## Route 1C

In [None]:
rt1c_test = sanj_long_shapes.query('route_id == "1c"')

In [None]:
rt1c_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt1c_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1C')

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt1c_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1C')

In [None]:
chart

## Route 3

* Operated by RABA Chico - Redding, seems to create a data gap in that section

In [None]:
rt3_test = sanj_long_shapes.query('route_id == "3"')

In [None]:
rt3_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt3_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              # ridership_data_route='Rt 03'
                                                  )

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt3_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              # ridership_data_route='Rt 03'
                                                  )

In [None]:
chart

## Route 6

In [None]:
rt6_test = sanj_long_shapes.query('route_id == "6"')

In [None]:
rt6_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
import chart_utils
importlib.reload(chart_utils)

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt6_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 06')

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt6_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 06')

In [None]:
chart

## Route 7

In [None]:
rt7_test = sanj_long_shapes.query('route_id == "7"')

In [None]:
rt7_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt7_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 07')

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt7_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 07'
                                                  )

In [None]:
chart

## Route 10

* Seems to function as two halves with train connection in the middle, but a few passengers can and do ride through (for example Santa Barbara - Las Vegas)

In [None]:
rt10_test = sanj_long_shapes.query('route_id == "10"')

In [None]:
rt10_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt10_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 10'
                                                  )

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt10_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 10'
                                                  )

In [None]:
chart

## Route 15a (YARTS)

* some sort of bug on 15b, 15 (both also YARTS, will check later)

In [None]:
rt15_test = sanj_long_shapes.query('route_id == "15a"')

In [None]:
rt15_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt15_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 15'
                                                  )

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt15_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 15'
                                                  )

In [None]:
chart

## Route 18

In [None]:
rt18_test = sanj_long_shapes.query('route_id == "18"')

In [None]:
rt18_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt18_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 18'
                                                  )

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt18_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 18'
                                                  )

In [None]:
chart

## Route 19

In [None]:
rt19_test = sanj_long_shapes.query('route_id == "19"')

In [None]:
rt19_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt19_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 19')

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt19_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 19')

In [None]:
chart

## Route 20

In [None]:
rt20_test = sanj_long_shapes.query('route_id == "20"')

In [None]:
rt20_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt20_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20')

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt20_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20')

In [None]:
chart

## Route 20c

In [None]:
rt20c_test = sanj_long_shapes.query('route_id == "20c"')

In [None]:
rt20c_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt20c_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20'
                                                  )

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt20c_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20'
                                                  )

In [None]:
chart

## Route 99

In [None]:
rt99_test = sanj_long_shapes.query('route_id == "99"')

In [None]:
rt99_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt99_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 99')

In [None]:
chart

In [None]:
chart = chart_utils.flow_chart_from_shape_trip_row(rt99_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 99')

In [None]:
chart