In [1]:
import pandas as pd
import geopandas as gpd
from calitp_data_analysis.sql import query_sql
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
from calitp_data_analysis.geography_utils import CA_NAD83Albers_m, WGS84
gcsgp = GCSGeoPandas()
from shared_utils import gtfs_utils_v2
from shared_utils.rt_utils import show_full_df

import utils
from update_vars import ANALYSIS_DATE, shape_cols, trip_cols, stop_cols, stop_time_cols, GCS_PATH
analysis_date = ANALYSIS_DATE

# Explore Thruway Bus (and state-suppoted rail) data availability

In [2]:
ca = gcsgp.read_parquet('gs://calitp-analytics-data/data-analyses/high_quality_transit_areas/ca_boundary.parquet')

In [3]:
feeds = gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(analysis_date)

## New SJJPA San Joaquins feed

In [4]:
query = '''
SELECT * from cal-itp-data-infra.mart_transit_database.dim_gtfs_service_data
WHERE _is_current
LIMIT 1000
'''

In [5]:
df = query_sql(query)

In [6]:
sanj = df.query('name.str.contains("Amtrak San J")')

In [7]:
sanj

Unnamed: 0,key,name,source_record_id,service_key,gtfs_dataset_key,customer_facing,category,fares_v2_status,manual_check__fixed_route_completeness,manual_check__demand_response_completeness,_is_current,_valid_from,_valid_to
728,c9364f7e1d226a2a1b57de51ec4375ad,Amtrak San Joaquins – Amtrak San Joaquins Sche...,recBvVy7cJzjXBjeD,b3a26733da02debeb0e23e30c8d8e928,524ea6209600e9a2de34a02cf9068729,True,primary,[Unknown],Unknown,Unknown,True,2025-08-14 00:00:00+00:00,2098-12-31 23:59:59.999999+00:00


In [8]:
sanj = feeds.query('name.str.contains("Amtrak San Joaquins")')
sanj

Unnamed: 0,key,date,feed_key,feed_timezone,base64_url,gtfs_dataset_key,name,regional_feed_type,type
77,2b538004ce62dd20b1c326745da7834f,2025-09-10,d9e1e77d0754b712fc608741ae3836f5,America/Los_Angeles,aHR0cHM6Ly9kMzR0aXc2NG41ejRvaC5jbG91ZGZyb250Lm...,524ea6209600e9a2de34a02cf9068729,Amtrak San Joaquins Schedule,,schedule


In [9]:
sanj_shapes = gtfs_utils_v2.get_shapes(analysis_date, operator_feeds=sanj.feed_key, shape_cols=shape_cols)

AttributeError: module 'utils' has no attribute 'shape_cols'

In [None]:
sanj_shapes = sanj_shapes

In [None]:
sanj_shapes

In [None]:
sanj_trips = gtfs_utils_v2.get_trips(analysis_date, operator_feeds=sanj.feed_key, trip_cols=trip_cols)

In [None]:
# sanj_trips.query('route_type == "2"')

In [None]:
sanj_trips[['route_short_name', 'route_long_name', 'trip_instance_key']].groupby(['route_short_name', 'route_long_name']).count()

In [None]:
sanj_shapes_trip_info = sanj_shapes.merge(sanj_trips.drop_duplicates(subset=['shape_array_key']), on = ['shape_array_key', 'feed_key'])

In [None]:
# sanj_shapes_trip_info.drop(columns=['service_date']).explore(column='route_long_name')

In [None]:
sanj_trips.query('route_short_name == "Route 99"')

## Capitol Corridor

In [None]:
cc_feed = feeds.query('name.str.contains("Capitol")')

In [None]:
cc_shapes = gtfs_utils_v2.get_shapes(analysis_date, operator_feeds=cc_feed.feed_key, shape_cols=utils.shape_cols)

In [None]:
cc_trips = gtfs_utils_v2.get_trips(analysis_date, operator_feeds=cc_feed.feed_key, trip_cols=utils.trip_cols)

In [None]:
cc_trips[['route_short_name', 'route_long_name', 'trip_instance_key']].groupby(['route_short_name', 'route_long_name']).count()

In [None]:
cc_shapes_trip_info = cc_shapes.merge(cc_trips.drop_duplicates(subset=['shape_array_key']), on = ['shape_array_key', 'feed_key'])

In [None]:
# cc_shapes_trip_info.drop(columns=['service_date']).explore(column='route_long_name')

## Pacific Surfliner (via Amtrak National feed)

In [10]:
surf_feed = feeds.query('name.str.contains("Amtrak Schedule")')

In [11]:
surf_feed

Unnamed: 0,key,date,feed_key,feed_timezone,base64_url,gtfs_dataset_key,name,regional_feed_type,type
18,541a64be0d1f8379b710b36071428bd5,2025-09-10,c8afbdabdbbf7a511e299824601f4a7c,America/New_York,aHR0cHM6Ly9jb250ZW50LmFtdHJhay5jb20vY29udGVudC...,1165b1474df778cb0fc3ba9246e32035,Amtrak Schedule,,schedule


In [12]:
surf_shapes = gtfs_utils_v2.get_shapes(analysis_date, operator_feeds=surf_feed.feed_key, shape_cols=shape_cols)

  sqlalchemy.util.warn(


In [13]:
surf_stops = gtfs_utils_v2.get_stops(analysis_date, operator_feeds=surf_feed.feed_key, stop_cols=stop_cols)

  sqlalchemy.util.warn(


In [14]:
surf_stops = surf_stops.to_crs(CA_NAD83Albers_m)
ca = ca.to_crs(CA_NAD83Albers_m)

surf_stops = surf_stops.clip(ca)

In [15]:
surf_trips = gtfs_utils_v2.get_trips(analysis_date, operator_feeds=surf_feed.feed_key, trip_cols=trip_cols)

In [16]:
stop_time_cols = ['feed_key', 'trip_id', 'stop_id',
                 'stop_sequence', 'arrival_time', 'departure_time',
                 'arrival_sec', 'departure_sec']

In [17]:
surf_st = gtfs_utils_v2.get_stop_times(analysis_date, operator_feeds=surf_feed.feed_key, get_df=True,
                                      trip_df = surf_trips, stop_time_cols=stop_time_cols)

  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(
  sqlalchemy.util.warn(


In [18]:
surf_trips = surf_trips.query('route_long_name == "Amtrak Thruway Connecting Service"')

In [19]:
surf_merged = surf_trips.merge(surf_st, on = ['feed_key', 'trip_id']).merge(surf_stops, on = ['feed_key', 'stop_id'])

### CA Thruway in Amtrak National Feed:

#### `route_id` 41080

* ['Chico Amtrak', 'Red Bluff Amtrak Bus Stop',
       'Redding Amtrak Bus Stop']

#### 42896

* ['Bakersfield', 'Uc Santa Barbara Amtrak Bus Stop', 'Santa Barbara',
       'Las Vegas', 'Las Vegas Amtrak Bus Stop',
       'Barstow Amtrak Bus Stop', 'Barstow', 'Mojave Amtrak Bus Stop',
       'Tehachapi Amtrak Bus Stop', 'Fillmore Amtrak Bus Stop',
       'Santa Paula Amtrak Bus Stop', 'Oxnard', 'Ventura Amtrak',
       'Carpinteria Amtrak']

#### 42954

* strange one, nationwide stops including CA points

#### `route_id` not that useful, look for trip patterns...



In [20]:
trip_id_stops = surf_merged[['trip_id', 'stop_name']].groupby('trip_id')['stop_name'].unique().map(lambda x: frozenset(x))

In [21]:
trip_id_stops

trip_id
183836    (Auburn Amtrak, Truckee Amtrak Station, Rosevi...
183852    (Auburn Amtrak, Truckee Amtrak Station, Rosevi...
183877                       (Roseville Amtrak, Sacramento)
183910    (Elk Grove Amtrak Bus Stop, Stockton, Sacramento)
183911    (Oroville Amtrak Bus Stop, Marysville Amtrak S...
                                ...                        
205408          (Bakersfield, Los Angeles, Glendale Amtrak)
205411                           (Bakersfield, Los Angeles)
205414    (Westwood Ucla Amtrak Bus Stop, Van Nuys, Burb...
205417    (Vallejo, Santa Rosa Amtrak Bus Stop, Petaluma...
205420                          (San Francisco, Emeryville)
Name: stop_name, Length: 128, dtype: object

In [22]:
trip_patterns = trip_id_stops.value_counts() # count unique trip patterns...

In [23]:
trip_patterns

(San Francisco, Emeryville)                                                                                                                                                                                                                                                                                               38
(Bakersfield, Los Angeles, Glendale Amtrak)                                                                                                                                                                                                                                                                                9
(San Jose, Oakland, Paso Robles Amtrak Station, Santa Barbara, Grover Beach Amtrak, Solvang Amtrak Bus Stop, King City Amtrak Bus Stop, Salinas, San Luis Obispo, San Francisco, Santa Maria Amtrak Bus Stop, Buellton Amtrak Bus Stop, Uc Santa Barbara Amtrak Bus Stop, San Luis Obispo Amtrak Bus Stop, Emeryville)     6
(Lodi Amtrak Station, Davis, Stockton, Sacramento

### Surfliner-associated Trip Patterns

https://www.pacificsurfliner.com/plan-your-trip/connections/bus-connections/

In [None]:
sb_oakland = trip_patterns.index[2]
sb_oakland

In [None]:
fullerton_indio = trip_patterns.index[17]

In [None]:
fullerton_indio

In [None]:
fullerton_palm_springs = trip_patterns.index[21]

In [None]:
fullerton_palm_springs

In [None]:
surfliner_routes_from_trip_patterns = {fullerton_palm_springs: 'Fullerton - Palm Springs', fullerton_indio: 'Fullerton - Indio', sb_oakland: 'Santa Barbara - Oakland'}

In [None]:
trips_with_pattern = trip_id_stops.reset_index().rename(columns={'stop_name':'trip_pattern'})
trips_with_pattern = trips_with_pattern.assign(manual_route_name = trips_with_pattern.trip_pattern.map(
    lambda x: surfliner_routes_from_trip_patterns[x] if x in surfliner_routes_from_trip_patterns.keys() else None)
                                              )

In [None]:
trips_with_pattern = trips_with_pattern.dropna() #  trip_id for Surfliner-associated routes...
trips_with_pattern

In [None]:
trips_with_pattern = trips_with_pattern.merge(surf_trips, on='trip_id')

In [None]:
trips_with_pattern

In [None]:
#  unhelpful that these all have the same route_id!
trips_with_pattern[['route_id', 'route_long_name', 'trip_instance_key']].groupby(['route_long_name', 'route_id']).count()

In [None]:
surf_st_merged = surf_stops.merge(surf_st, on = ['feed_key', 'stop_id']).merge(trips_with_pattern, on = ['feed_key', 'trip_id'])

In [None]:
gcsgp.geo_data_frame_to_parquet?

In [None]:
gcsgp.geo_data_frame_to_parquet(surf_st_merged.drop(columns='trip_pattern'), f'{GCS_PATH}intermediate/surfliner_national_st.parquet')

## A derived timetable of Pacific Surfliner-associated thruways in the Amtrak Nationwide feed

Daily Trips:

* 1 Indio - Fullerton
* 1 Fullerton - Indio
* 1 Fullerton - Palm Springs Airport
* 1 Palm Springs Airport - Fullerton
* 4 Oakland - Santa Barbara
* 4 Santa Barbara - Oakland

In [None]:
# show_full_df(utils.format_stop_times(surf_st_merged))

## Visualizations

In [None]:
import chart_utils

In [None]:
remix = gpd.read_file('Amtrak_Thruway_Bus-2025-10-15-18-37-57-route_geometry.zip').to_crs(CA_NAD83Albers_m)

In [None]:
gcsgp.geo_data_frame_to_parquet(remix, f'{GCS_PATH}source_data/remix.parquet')

In [None]:
remix.head(1)

In [None]:
line_17 = remix.query('line_name == "17 EMY-SBA Thruway"') #  inbound is northbound
line_17 = line_17.assign(trip_id = ['205369', '205375'])
line_17

In [None]:
surf_st_merged.manual_route_name.unique()

In [None]:
line_17_st = surf_st_merged.query('manual_route_name == "Santa Barbara - Oakland"')
line_17_st = line_17_st.assign(amtrak_stop = line_17_st.stop_id)

In [None]:
#  charts only show one month for now
source_ridership = utils.read_format_ridership().query('trip_year == 2025 & trip_month == 4')

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_17.iloc[0], stop_times=line_17_st, ridership=source_ridership,
                              ridership_data_route='Rt 17')

In [None]:
import importlib
importlib.reload(chart_utils)

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_17.iloc[1], stop_times=line_17_st, ridership=source_ridership,
                              ridership_data_route='Rt 17')

### Line 

In [None]:
remix.line_name.unique()

In [None]:
line_39 = remix.query('line_name == "Route 39 - Fullerton - Palm Springs - Indio" & pattern == "Long (IND)"') #  iloc 0 is WB, iloc 1 is EB
line_39 = line_39.assign(trip_id = ['201578', '201581'])
line_39

In [None]:
line_39_st = surf_st_merged.query('manual_route_name == "Fullerton - Indio"')
line_39_st = line_39_st.assign(amtrak_stop = line_39_st.stop_id)

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_39.iloc[0], stop_times=line_39_st, ridership=source_ridership,
                              ridership_data_route='Rt 39')

In [None]:
chart_utils.flow_chart_from_shape_trip_row(line_39.iloc[1], stop_times=line_39_st, ridership=source_ridership,
                              ridership_data_route='Rt 39')