In [None]:
%%capture

import pandas as pd
import geopandas as gpd
from calitp_data_analysis.sql import query_sql
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from calitp_data_analysis.geography_utils import CA_NAD83Albers_m, WGS84
import calitp_data_analysis.magics
gcsgp = GCSGeoPandas()
from shared_utils import gtfs_utils_v2, rt_utils

import utils
import update_vars
import altair as alt
import numpy as np
import shapely
import branca

import importlib
importlib.reload(utils)
importlib.reload(update_vars)

In [None]:
import chart_utils

# Passenger Flow and Ridership Charts by Route and Origin/Destination

* Hues represent origin stops (and may repeat where there are many)
* Within each origin group, darker shades represent longer O/D pairs
* Currently based on April 2025 figures only

In [None]:
analysis_date = update_vars.ANALYSIS_DATE
GCS_PATH = update_vars.GCS_PATH

In [None]:
feeds = gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(analysis_date)

In [None]:
sanj = feeds.query('name == "Amtrak San Joaquins Schedule"')

In [None]:
sanj_shapes = gtfs_utils_v2.get_shapes(analysis_date, operator_feeds=sanj.feed_key, shape_cols=update_vars.shape_cols)
sanj_shapes = sanj_shapes.to_crs(CA_NAD83Albers_m)
sanj_shapes = sanj_shapes.assign(length_meters = sanj_shapes.geometry.length)

In [None]:
sanj_trips = gtfs_utils_v2.get_trips(analysis_date, operator_feeds=sanj.feed_key, trip_cols=update_vars.trip_cols).query('route_type == "3"')

In [None]:
sanj_stops = gtfs_utils_v2.get_stops(analysis_date, operator_feeds=sanj.feed_key, stop_cols=update_vars.stop_cols).to_crs(CA_NAD83Albers_m)

In [None]:
sanj_st = gtfs_utils_v2.get_stop_times(analysis_date, operator_feeds=sanj.feed_key, get_df=True,
                                      trip_df = sanj_trips, stop_time_cols=update_vars.stop_time_cols)

In [None]:
bus_st = sanj_st.merge(sanj_trips[['trip_id', 'direction_id', 'route_id', 'route_short_name',
                                   'shape_array_key', 'shape_id']], on='trip_id')

In [None]:
sanj_shapes_trip_info = (sanj_shapes.merge(sanj_trips.drop_duplicates(subset=['shape_array_key']), on = ['shape_array_key', 'feed_key'])
                            .drop(columns=['base64_url', 'regional_feed_type', 'block_id',
                                           'route_desc', 'agency_id', 'network_id',
                                          'route_key'])
                        )

In [None]:
gcsgp.geo_data_frame_to_parquet(sanj_shapes_trip_info, f'{update_vars.GCS_PATH}intermediate/sanj_shapes_trip_info_{analysis_date}.parquet')

In [None]:
sanj_long_shapes = utils.longest_by_route_dir(sanj_shapes_trip_info)

In [None]:
#  charts only show one month for now
source_ridership = utils.read_format_ridership().query('trip_year == 2025 & trip_month == 4')

In [None]:
bus_stops = sanj_st.stop_id.unique()

In [None]:
get_ridership_unique_stops = lambda df: np.union1d(df.orig, df.dest)

In [None]:
rider_stops = get_ridership_unique_stops(source_ridership)

In [None]:
sanj_bus_to_rider_stops = dict(zip(bus_stops, [x[1:] for x in bus_stops]))
sanj_bus_to_rider_stops['bLOS'] = 'LAX'

In [None]:
bus_st = pd.merge(sanj_stops[['stop_id', 'geometry']], bus_st, on='stop_id')
bus_st = bus_st.assign(amtrak_stop = bus_st.stop_id.map(sanj_bus_to_rider_stops)).sort_values(['trip_id', 'stop_sequence'])

# Focus Routes

## Route 1

In [None]:
rt1_test = sanj_long_shapes.query('route_id == "1"')

In [None]:
rt1_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1A')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1A', how='time')

## Route 1 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1A')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1A', how='time')

## Route 1b

In [None]:
rt1b_test = sanj_long_shapes.query('route_id == "1"')

In [None]:
rt1b_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1b_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1B')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1b_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1B', how='time')

## Route 1b - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1b_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1B')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1b_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1B', how='time')

## Route 1C

In [None]:
rt1c_test = sanj_long_shapes.query('route_id == "1c"')

In [None]:
rt1c_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1c_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1C')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1c_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1C', how='time')

## Route 1C - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1c_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1C')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt1c_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 1C', how='time')

## Route 19

In [None]:
rt19_test = sanj_long_shapes.query('route_id == "19"')

In [None]:
rt19_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt19_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 19')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt19_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 19', how='time')

## Route 19 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt19_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 19')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt19_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 19', how='time')

## Route 99

In [None]:
rt99_test = sanj_long_shapes.query('route_id == "99"')

In [None]:
rt99_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt99_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 99')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt99_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 99', how='time')

## Route 99 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt99_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 99')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt99_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 99', how='time')

# Other Routes

## Route 3

* Operated by RABA Chico - Redding, seems to create a data gap in that section

### By Distance Traveled

In [None]:
rt3_test = sanj_long_shapes.query('route_id == "3"')

In [None]:
rt3_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt3_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              # ridership_data_route='Rt 03'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt3_test.iloc[0], stop_times=bus_st, ridership=source_ridership, how='time'
                              # ridership_data_route='Rt 03'
                                                  )

## Route 3 - other direction

* Operated by RABA Chico - Redding, seems to create a data gap in that section

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt3_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              # ridership_data_route='Rt 03'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt3_test.iloc[1], stop_times=bus_st, ridership=source_ridership, how='time'
                              # ridership_data_route='Rt 03'
                                                  )

## Route 6

In [None]:
rt6_test = sanj_long_shapes.query('route_id == "6"')

In [None]:
rt6_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt6_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 06')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt6_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 06', how='time')

## Route 6 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt6_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 06')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt6_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 06', how='time')

## Route 7

In [None]:
rt7_test = sanj_long_shapes.query('route_id == "7"')

In [None]:
rt7_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt7_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 07')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt7_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 07', how='time')

## Route 7 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt7_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 07')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt7_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 07', how='time')

## Route 10

* Seems to function as two halves with train connection in the middle, but a few passengers can and do ride through (for example Santa Barbara - Las Vegas)

In [None]:
rt10_test = sanj_long_shapes.query('route_id == "10"')

In [None]:
rt10_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt10_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 10'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt10_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 10', how='time'
                                                  )

## Route 10 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt10_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 10'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt10_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 10', how='time'
                                                  )

## Route 15a (YARTS)

* some sort of bug on 15b, 15 (both also YARTS, will check later)

In [None]:
rt15_test = sanj_long_shapes.query('route_id == "15a"')

In [None]:
rt15_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt15_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 15'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt15_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 15', how='time'
                                                  )

## Route 15a (YARTS) - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt15_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 15'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt15_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 15', how='time'
                                                  )

## Route 18

In [None]:
rt18_test = sanj_long_shapes.query('route_id == "18"')

In [None]:
rt18_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt18_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 18'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt18_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 18', how='time'
                                                  )

## Route 18 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt18_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 18'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt18_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 18', how='time'
                                                  )

## Route 20

In [None]:
rt20_test = sanj_long_shapes.query('route_id == "20"')

In [None]:
rt20_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt20_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt20_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20', how='time')

## Route 20 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt20_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt20_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20', how='time')

## Route 20c

In [None]:
rt20c_test = sanj_long_shapes.query('route_id == "20c"')

In [None]:
rt20c_test[['name', 'route_short_name', 'route_long_name', 'shape_id']]

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt20c_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt20c_test.iloc[0], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20', how='time'
                                                  )

## Route 20c - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt20c_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20'
                                                  )

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(rt20c_test.iloc[1], stop_times=bus_st, ridership=source_ridership,
                              ridership_data_route='Rt 20', how='time'
                                                  )

## Route 17

In [None]:
remix = gcsgp.read_parquet(f'{GCS_PATH}source_data/remix.parquet')

In [None]:

line_17 = remix.query('line_name == "17 EMY-SBA Thruway"') #  inbound is northbound
line_17 = line_17.assign(trip_id = ['205369', '205375'])

line_39 = remix.query('line_name == "Route 39 - Fullerton - Palm Springs - Indio" & pattern == "Long (IND)"') #  iloc 0 is WB, iloc 1 is EB
line_39 = line_39.assign(trip_id = ['201578', '201581'])

In [None]:
surf_st = gcsgp.read_parquet(f'{GCS_PATH}intermediate/surfliner_national_st.parquet')

In [None]:
line_17_st = surf_st.query('manual_route_name == "Santa Barbara - Oakland"')
line_17_st = line_17_st.assign(amtrak_stop = line_17_st.stop_id)
line_39_st = surf_st.query('manual_route_name == "Fullerton - Indio"')
line_39_st = line_39_st.assign(amtrak_stop = line_39_st.stop_id)

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_17.iloc[0], stop_times=line_17_st, ridership=source_ridership,
                              ridership_data_route='Rt 17')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_17.iloc[0], stop_times=line_17_st, ridership=source_ridership,
                              ridership_data_route='Rt 17', how='time')

## Route 17 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_17.iloc[1], stop_times=line_17_st, ridership=source_ridership,
                              ridership_data_route='Rt 17')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_17.iloc[1], stop_times=line_17_st, ridership=source_ridership,
                              ridership_data_route='Rt 17', how='time')

## Route 39

* 1/day Fullerton - Palm Springs
* 1/day Fulletron - Palm Springs - Indio

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_39.iloc[0], stop_times=line_39_st, ridership=source_ridership,
                              ridership_data_route='Rt 39')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_39.iloc[0], stop_times=line_39_st, ridership=source_ridership,
                              ridership_data_route='Rt 39', how='time')

## Route 39 - other direction

### By Distance Traveled

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_39.iloc[1], stop_times=line_39_st, ridership=source_ridership,
                              ridership_data_route='Rt 39')

### By Travel Time

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_39.iloc[1], stop_times=line_39_st, ridership=source_ridership,
                              ridership_data_route='Rt 39', how='time')