In [None]:
import pandas as pd
import geopandas as gpd
from calitp_data_analysis.sql import query_sql
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from calitp_data_analysis.geography_utils import CA_NAD83Albers_m, WGS84
gcsgp = GCSGeoPandas()
from shared_utils import gtfs_utils_v2
from shared_utils.rt_utils import show_full_df

import utils
from update_vars import ANALYSIS_DATE, shape_cols, trip_cols, stop_cols, stop_time_cols
analysis_date = ANALYSIS_DATE

# Explore Thruway Bus (and state-suppoted rail) data availability

In [None]:
ca = gcsgp.read_parquet('gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/ca_boundary.parquet')

In [None]:
feeds = gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(analysis_date)

## New SJJPA San Joaquins feed

In [None]:
query = '''
SELECT * from cal-itp-data-infra.mart_transit_database.dim_gtfs_service_data
WHERE _is_current
LIMIT 1000
'''

In [None]:
df = query_sql(query)

In [None]:
sanj = df.query('name.str.contains("Amtrak San J")')

In [None]:
sanj

In [None]:
sanj = feeds.query('name.str.contains("Amtrak San Joaquins")')
sanj

In [None]:
sanj_shapes = gtfs_utils_v2.get_shapes(analysis_date, operator_feeds=sanj.feed_key, shape_cols=utils.shape_cols)

In [None]:
sanj_shapes = sanj_shapes

In [None]:
sanj_shapes

In [None]:
sanj_trips = gtfs_utils_v2.get_trips(analysis_date, operator_feeds=sanj.feed_key, trip_cols=utils.trip_cols)

In [None]:
# sanj_trips.query('route_type == "2"')

In [None]:
sanj_trips[['route_short_name', 'route_long_name', 'trip_instance_key']].groupby(['route_short_name', 'route_long_name']).count()

In [None]:
sanj_shapes_trip_info = sanj_shapes.merge(sanj_trips.drop_duplicates(subset=['shape_array_key']), on = ['shape_array_key', 'feed_key'])

In [None]:
# sanj_shapes_trip_info.drop(columns=['service_date']).explore(column='route_long_name')

In [None]:
sanj_trips.query('route_short_name == "Route 99"')

## Capitol Corridor

In [None]:
cc_feed = feeds.query('name.str.contains("Capitol")')

In [None]:
cc_shapes = gtfs_utils_v2.get_shapes(analysis_date, operator_feeds=cc_feed.feed_key, shape_cols=utils.shape_cols)

In [None]:
cc_trips = gtfs_utils_v2.get_trips(analysis_date, operator_feeds=cc_feed.feed_key, trip_cols=utils.trip_cols)

In [None]:
cc_trips[['route_short_name', 'route_long_name', 'trip_instance_key']].groupby(['route_short_name', 'route_long_name']).count()

In [None]:
cc_shapes_trip_info = cc_shapes.merge(cc_trips.drop_duplicates(subset=['shape_array_key']), on = ['shape_array_key', 'feed_key'])

In [None]:
# cc_shapes_trip_info.drop(columns=['service_date']).explore(column='route_long_name')

## Pacific Surfliner (via Amtrak National feed)

In [None]:
surf_feed = feeds.query('name.str.contains("Amtrak Schedule")')

In [None]:
surf_feed

In [None]:
surf_shapes = gtfs_utils_v2.get_shapes(analysis_date, operator_feeds=surf_feed.feed_key, shape_cols=shape_cols)

In [None]:
surf_stops = gtfs_utils_v2.get_stops(analysis_date, operator_feeds=surf_feed.feed_key, stop_cols=stop_cols)

In [None]:
surf_stops = surf_stops.to_crs(CA_NAD83Albers_m)
ca = ca.to_crs(CA_NAD83Albers_m)

surf_stops = surf_stops.clip(ca)

In [None]:
surf_trips = gtfs_utils_v2.get_trips(analysis_date, operator_feeds=surf_feed.feed_key, trip_cols=trip_cols)

In [None]:
stop_time_cols = ['feed_key', 'trip_id', 'stop_id',
                 'stop_sequence', 'arrival_time', 'departure_time',
                 'arrival_sec', 'departure_sec']

In [None]:
surf_st = gtfs_utils_v2.get_stop_times(analysis_date, operator_feeds=surf_feed.feed_key, get_df=True,
                                      trip_df = surf_trips, stop_time_cols=stop_time_cols)

In [None]:
surf_trips = surf_trips.query('route_long_name == "Amtrak Thruway Connecting Service"')

In [None]:
surf_merged = surf_trips.merge(surf_st, on = ['feed_key', 'trip_id']).merge(surf_stops, on = ['feed_key', 'stop_id'])

### CA Thruway in Amtrak National Feed:

#### `route_id` 41080

* ['Chico Amtrak', 'Red Bluff Amtrak Bus Stop',
       'Redding Amtrak Bus Stop']

#### 42896

* ['Bakersfield', 'Uc Santa Barbara Amtrak Bus Stop', 'Santa Barbara',
       'Las Vegas', 'Las Vegas Amtrak Bus Stop',
       'Barstow Amtrak Bus Stop', 'Barstow', 'Mojave Amtrak Bus Stop',
       'Tehachapi Amtrak Bus Stop', 'Fillmore Amtrak Bus Stop',
       'Santa Paula Amtrak Bus Stop', 'Oxnard', 'Ventura Amtrak',
       'Carpinteria Amtrak']

#### 42954

* strange one, nationwide stops including CA points

#### `route_id` not that useful, look for trip patterns...



In [None]:
trip_id_stops = surf_merged[['trip_id', 'stop_name']].groupby('trip_id')['stop_name'].unique().map(lambda x: frozenset(x))

In [None]:
trip_id_stops

In [None]:
trip_patterns = trip_id_stops.value_counts() # count unique trip patterns...

In [None]:
trip_patterns

### Surfliner-associated Trip Patterns

https://www.pacificsurfliner.com/plan-your-trip/connections/bus-connections/

In [None]:
sb_oakland = trip_patterns.index[2]
sb_oakland

In [None]:
fullerton_indio = trip_patterns.index[21]

In [None]:
fullerton_indio

In [None]:
fullerton_palm_springs = trip_patterns.index[24]

In [None]:
fullerton_palm_springs

In [None]:
surfliner_routes_from_trip_patterns = {fullerton_palm_springs: 'Fullerton - Palm Springs', fullerton_indio: 'Fullerton - Indio', sb_oakland: 'Santa Barbara - Oakland'}

In [None]:
trips_with_pattern = trip_id_stops.reset_index().rename(columns={'stop_name':'trip_pattern'})
trips_with_pattern = trips_with_pattern.assign(manual_route_name = trips_with_pattern.trip_pattern.map(
    lambda x: surfliner_routes_from_trip_patterns[x] if x in surfliner_routes_from_trip_patterns.keys() else None)
                                              )

In [None]:
trips_with_pattern = trips_with_pattern.dropna() #  trip_id for Surfliner-associated routes...
trips_with_pattern

In [None]:
trips_with_pattern = trips_with_pattern.merge(surf_trips, on='trip_id')

In [None]:
trips_with_pattern

In [None]:
#  unhelpful that these all have the same route_id!
trips_with_pattern[['route_id', 'route_long_name', 'trip_instance_key']].groupby(['route_long_name', 'route_id']).count()

In [None]:
surf_st_merged = surf_stops.merge(surf_st, on = ['feed_key', 'stop_id']).merge(trips_with_pattern, on = ['feed_key', 'trip_id'])

## A derived timetable of Pacific Surfliner-associated thruways in the Amtrak Nationwide feed

Daily Trips:

* 1 Indio - Fullerton
* 1 Fullerton - Indio
* 1 Fullerton - Palm Springs Airport
* 1 Palm Springs Airport - Fullerton
* 4 Oakland - Santa Barbara
* 4 Santa Barbara - Oakland

In [None]:
# show_full_df(utils.format_stop_times(surf_st_merged))

## Visualizations

In [None]:
import chart_utils

In [None]:
remix = gpd.read_file('Amtrak_Thruway_Bus-2025-10-15-18-37-57-route_geometry.zip').to_crs(CA_NAD83Albers_m)

In [None]:
remix.head(1)

In [None]:
line_17 = remix.query('line_name == "17 EMY-SBA Thruway"') #  inbound is northbound
line_17 = line_17.assign(trip_id = ['205369', '205375'])
line_17

In [None]:
surf_st_merged.manual_route_name.unique()

In [None]:
line_17_st = surf_st_merged.query('manual_route_name == "Santa Barbara - Oakland"')
line_17_st = line_17_st.assign(amtrak_stop = line_17_st.stop_id)

In [None]:
#  charts only show one month for now
source_ridership = utils.read_format_ridership().query('trip_year == 2025 & trip_month == 4')

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_17.iloc[0], stop_times=line_17_st, ridership=source_ridership,
                              ridership_data_route='Rt 17')

In [None]:
import importlib
importlib.reload(chart_utils)

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_17.iloc[1], stop_times=line_17_st, ridership=source_ridership,
                              ridership_data_route='Rt 17')