In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [2]:
import geopandas as gpd
import pandas as pd
from siuba import *
import numpy as np

from segment_speed_utils import helpers, gtfs_schedule_wrangling
from shared_utils import rt_dates, gtfs_utils_v2
import folium
import itertools

In [3]:
from update_vars import (analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, AM_PEAK, PM_PEAK, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD)

# Why is the Coronado ferry missing from our datasets?

See https://github.com/cal-itp/data-analyses/issues/1458

In [4]:
analysis_date

'2025-04-16'

In [5]:
import rail_ferry_brt_stops

In [103]:
import importlib
importlib.reload(rail_ferry_brt_stops)

<module 'rail_ferry_brt_stops' from '/home/jovyan/data-analyses/high_quality_transit_areas/rail_ferry_brt_stops.py'>

In [104]:
stops_all = rail_ferry_brt_stops.assemble_stops(analysis_date)

In [105]:
ferry = stops_all.query('route_type == "4"')

In [106]:
ferry # coronado now present!

Unnamed: 0,feed_key,stop_id,stop_name,geometry,name,route_id,route_type,route_desc
446,8510daa6c8576e648fcbd4f92ea73a51,99440,Broadway Pier,POINT (265337.654 -584519.841),San Diego Schedule,COR,4,
710,8510daa6c8576e648fcbd4f92ea73a51,99439,Coronado Ferry Landing,POINT (265739.278 -586372.329),San Diego Schedule,COR,4,
14520,1dc19b0c7c704e7663492625b4c1acb1,1,Treasure Island,POINT (-208524.906 -19590.082),Bay Area 511 Treasure Island Ferry Schedule,TISF,4,
14521,1dc19b0c7c704e7663492625b4c1acb1,2,San Francisco,POINT (-210370.936 -21759.606),Bay Area 511 Treasure Island Ferry Schedule,TISF,4,
21077,8510daa6c8576e648fcbd4f92ea73a51,99441,5th Av Pier,POINT (266488.380 -585759.477),San Diego Schedule,COR,4,
81877,7bc8ee29bf0fd457367c7ae3dcdcb2c1,72011,San Francisco Ferry Building Gate E,POINT (-210291.852 -21944.196),Bay Area 511 San Francisco Bay Ferry Schedule,VJO,4,
81878,7bc8ee29bf0fd457367c7ae3dcdcb2c1,72011,San Francisco Ferry Building Gate E,POINT (-210291.852 -21944.196),Bay Area 511 San Francisco Bay Ferry Schedule,RCH,4,
81879,7bc8ee29bf0fd457367c7ae3dcdcb2c1,72011,San Francisco Ferry Building Gate E,POINT (-210291.852 -21944.196),Bay Area 511 San Francisco Bay Ferry Schedule,OA,4,
81880,7bc8ee29bf0fd457367c7ae3dcdcb2c1,72011,San Francisco Ferry Building Gate E,POINT (-210291.852 -21944.196),Bay Area 511 San Francisco Bay Ferry Schedule,HB,4,
81881,7bc8ee29bf0fd457367c7ae3dcdcb2c1,72011,San Francisco Ferry Building Gate E,POINT (-210291.852 -21944.196),Bay Area 511 San Francisco Bay Ferry Schedule,SEA,4,


## earlier stages

In [69]:
trips = helpers.import_scheduled_trips(
    analysis_date,
    columns = [
        "name", "feed_key",
        "trip_instance_key", "trip_id", 
        "route_id", "route_type", "route_desc",
        "shape_id"
    ],
    get_pandas = True
)

### present in trips

In [73]:
trips.query('route_id == "COR"').head(3) #  present in trips

Unnamed: 0,name,feed_key,trip_instance_key,trip_id,route_id,route_type,route_desc,shape_id
8010,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,bb077aa05b3bab38c8d87f16ec567a6f,15331358,COR,4,,
8011,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,ae163073f5df72dd20e25eef248e85b8,15331347,COR,4,,
8012,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,a9e6ebfead8f4d083d5e2c352a667af0,15331420,COR,4,,


In [51]:
stop_times = helpers.import_scheduled_stop_times(
    analysis_date,
    columns = ["feed_key", "schedule_gtfs_dataset_key",
               "stop_id", "trip_instance_key"],
    with_direction = True,
    get_pandas = True
)

In [53]:
stops_with_route = pd.merge(
    stop_times,
    trips,
    on = "trip_instance_key",
    how = "inner"
).drop(
    columns = "trip_instance_key"
).drop_duplicates().reset_index(drop=True)

### not present in stop times with direction

In [54]:
stops_with_route.query('route_id == "COR"') #  gone

Unnamed: 0,feed_key_x,schedule_gtfs_dataset_key,stop_id,name,feed_key_y,trip_id,route_id,route_type,route_desc


In [80]:
stops_with_route

Unnamed: 0,feed_key_x,schedule_gtfs_dataset_key,stop_id,name,feed_key_y,trip_id,route_id,route_type,route_desc
0,35702a19aac0ed4d2a616627483d3850,c499f905e33929a641f083dad55c521e,51828,Bay Area 511 AC Transit Schedule,35702a19aac0ed4d2a616627483d3850,7316020,40,3,
1,35702a19aac0ed4d2a616627483d3850,c499f905e33929a641f083dad55c521e,52728,Bay Area 511 AC Transit Schedule,35702a19aac0ed4d2a616627483d3850,7316020,40,3,
2,35702a19aac0ed4d2a616627483d3850,c499f905e33929a641f083dad55c521e,58888,Bay Area 511 AC Transit Schedule,35702a19aac0ed4d2a616627483d3850,7316020,40,3,
3,35702a19aac0ed4d2a616627483d3850,c499f905e33929a641f083dad55c521e,52643,Bay Area 511 AC Transit Schedule,35702a19aac0ed4d2a616627483d3850,7316020,40,3,
4,35702a19aac0ed4d2a616627483d3850,c499f905e33929a641f083dad55c521e,55655,Bay Area 511 AC Transit Schedule,35702a19aac0ed4d2a616627483d3850,7316020,40,3,
...,...,...,...,...,...,...,...,...,...
3718320,ebbfc358ebe559a96332a9f2aae30691,a37760dde6b9fdcb76b82e57afab7274,3ef13f76-3f2a-4948-9f3f-0d9b66c96b16,Flixbus Schedule,ebbfc358ebe559a96332a9f2aae30691,US1200-1689-0535042025-PGH6#DEN6-00,US1200,3,
3718321,ebbfc358ebe559a96332a9f2aae30691,a37760dde6b9fdcb76b82e57afab7274,6265b7a1-9e86-4265-b5ac-ca4ebec89803,Flixbus Schedule,ebbfc358ebe559a96332a9f2aae30691,US1200-1689-0535042025-PGH6#DEN6-00,US1200,3,
3718322,ebbfc358ebe559a96332a9f2aae30691,a37760dde6b9fdcb76b82e57afab7274,0c8d1ecb-4f3d-4577-9473-0eb1a4331e37,Flixbus Schedule,ebbfc358ebe559a96332a9f2aae30691,US1200-1689-0535042025-PGH6#DEN6-00,US1200,3,
3718323,ebbfc358ebe559a96332a9f2aae30691,a37760dde6b9fdcb76b82e57afab7274,1cded49c-028d-41ae-bafa-72e82976c01b,Flixbus Schedule,ebbfc358ebe559a96332a9f2aae30691,US1200-1689-0535042025-PGH6#DEN6-00,US1200,3,


## present in no-direction version of stop times

In [90]:
st_no_dir = helpers.import_scheduled_stop_times(
    analysis_date,
    columns = ["feed_key", "trip_id",
               "stop_id"],
    with_direction = False,
    get_pandas = True
)

In [91]:
trips.query('route_id == "COR"').merge(st_no_dir, on=['feed_key', 'trip_id']).head(3)

Unnamed: 0,name,feed_key,trip_instance_key,trip_id,route_id,route_type,route_desc,shape_id,stop_id
0,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,bb077aa05b3bab38c8d87f16ec567a6f,15331358,COR,4,,,99440
1,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,bb077aa05b3bab38c8d87f16ec567a6f,15331358,COR,4,,,99439
2,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,ae163073f5df72dd20e25eef248e85b8,15331347,COR,4,,,99440


## check `../gtfs_funnel/stop_times_with_direction.py`

* not a problem with that script, but it only works for trips with a `shape_id`...
* the Coronado Ferry has none, so it's not included in directional stop times

In [55]:
import sys
sys.path.append('../gtfs_funnel')

In [59]:
import stop_times_with_direction as st_dir

In [60]:
st_prepped = st_dir.prep_scheduled_stop_times(analysis_date)

In [71]:
trips.query('route_id == "COR"').merge(st_prepped, on=['trip_instance_key']).head(3) #  OK here

Unnamed: 0,name,feed_key_x,trip_instance_key,trip_id,route_id,route_type,route_desc,shape_id,feed_key_y,stop_id,stop_sequence,schedule_gtfs_dataset_key,shape_array_key,stop_name,geometry
0,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,bb077aa05b3bab38c8d87f16ec567a6f,15331358,COR,4,,,8510daa6c8576e648fcbd4f92ea73a51,99439,2,baeeb157e85a901e47b828ef9fe75091,,Coronado Ferry Landing,POINT (265739.278 -586372.329)
1,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,bb077aa05b3bab38c8d87f16ec567a6f,15331358,COR,4,,,8510daa6c8576e648fcbd4f92ea73a51,99440,1,baeeb157e85a901e47b828ef9fe75091,,Broadway Pier,POINT (265337.654 -584519.841)
2,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,ae163073f5df72dd20e25eef248e85b8,15331347,COR,4,,,8510daa6c8576e648fcbd4f92ea73a51,99439,2,baeeb157e85a901e47b828ef9fe75091,,Coronado Ferry Landing,POINT (265739.278 -586372.329)


## patch in trips without shapes (for rail/ferry/brt major stops only)

In [76]:
path = f"{GCS_FILE_PATH}rail_brt_ferry.parquet"

In [78]:
gpd.read_parquet(path).head(3) #  final format...

Unnamed: 0,schedule_gtfs_dataset_key_primary,stop_id,stop_name,route_id,route_type,hqta_type,geometry
0,0666caf3ec1ecc96b74f4477ee4bc939,10846,Harbor Transitway / Rosecrans,910-13191,3,major_stop_brt,POINT (158458.764 -455601.969)
1,0666caf3ec1ecc96b74f4477ee4bc939,10848,Harbor Transitway / 37th St / USC,910-13191,3,major_stop_brt,POINT (158829.095 -442750.031)
2,0666caf3ec1ecc96b74f4477ee4bc939,10853,Harbor Transitway / Manchester,910-13191,3,major_stop_brt,POINT (158878.770 -449203.463)


In [79]:
stops_all

Unnamed: 0,feed_key,stop_id,stop_name,geometry,schedule_gtfs_dataset_key,name,route_id,route_type,route_desc
0,8510daa6c8576e648fcbd4f92ea73a51,11656,10th Av & Broadway,POINT (267002.895 -584476.763),baeeb157e85a901e47b828ef9fe75091,San Diego Schedule,20,3,
1,8510daa6c8576e648fcbd4f92ea73a51,11656,10th Av & Broadway,POINT (267002.895 -584476.763),baeeb157e85a901e47b828ef9fe75091,San Diego Schedule,5,3,
2,8510daa6c8576e648fcbd4f92ea73a51,11654,10th Av & C St,POINT (266998.525 -584371.849),baeeb157e85a901e47b828ef9fe75091,San Diego Schedule,20,3,
3,8510daa6c8576e648fcbd4f92ea73a51,11654,10th Av & C St,POINT (266998.525 -584371.849),baeeb157e85a901e47b828ef9fe75091,San Diego Schedule,923,3,
4,8510daa6c8576e648fcbd4f92ea73a51,12050,10th Av & Island Av,POINT (267027.818 -585112.387),baeeb157e85a901e47b828ef9fe75091,San Diego Schedule,929,3,
...,...,...,...,...,...,...,...,...,...
128448,0cf6d1334269149e8532ced948b97e9e,22256,Silo Terminal & Center Island (WB),POINT (-152625.917 59524.930),d2b09fbd392b28d767c28ea26529b0cd,Unitrans Schedule,VX,3,
128449,0cf6d1334269149e8532ced948b97e9e,22256,Silo Terminal & Center Island (WB),POINT (-152625.917 59524.930),d2b09fbd392b28d767c28ea26529b0cd,Unitrans Schedule,J,3,
128450,0cf6d1334269149e8532ced948b97e9e,22256,Silo Terminal & Center Island (WB),POINT (-152625.917 59524.930),d2b09fbd392b28d767c28ea26529b0cd,Unitrans Schedule,C,3,
128451,0cf6d1334269149e8532ced948b97e9e,22256,Silo Terminal & Center Island (WB),POINT (-152625.917 59524.930),d2b09fbd392b28d767c28ea26529b0cd,Unitrans Schedule,V,3,


In [85]:
st_no_dir_cols = ["feed_key", "stop_id", "trip_id"]

In [89]:
st_no_dir[st_no_dir_cols].merge(trips, on=['feed_key', 'trip_id']).query('shape_id.isna()')

Unnamed: 0,feed_key,stop_id,trip_id,name,trip_instance_key,route_id,route_type,route_desc,shape_id
912220,ebbfc358ebe559a96332a9f2aae30691,10b1b8a5-044e-4e36-8541-f9c2fdb284a0,US0802-6840-1230042025-LAD6#SFD6-00,Flixbus Schedule,3b6d5dd03ce438317845a6d185c196b3,US0802,3,,
912221,ebbfc358ebe559a96332a9f2aae30691,299697c9-e75a-404f-94cb-eb17041354d8,US0802-6840-1230042025-LAD6#SFD6-00,Flixbus Schedule,3b6d5dd03ce438317845a6d185c196b3,US0802,3,,
912222,ebbfc358ebe559a96332a9f2aae30691,7d2e6c99-dc26-4de7-9095-f675fcf55339,US0802-6840-1230042025-LAD6#SFD6-00,Flixbus Schedule,3b6d5dd03ce438317845a6d185c196b3,US0802,3,,
912223,ebbfc358ebe559a96332a9f2aae30691,a2fca845-6058-424e-8d57-ed947fd59ee8,US0802-6840-1230042025-LAD6#SFD6-00,Flixbus Schedule,3b6d5dd03ce438317845a6d185c196b3,US0802,3,,
912224,ebbfc358ebe559a96332a9f2aae30691,cd9e4a67-2944-4b76-b3e2-be39b27ed825,US0802-6840-1230042025-LAD6#SFD6-00,Flixbus Schedule,3b6d5dd03ce438317845a6d185c196b3,US0802,3,,
...,...,...,...,...,...,...,...,...,...
4946796,8f0a1790ae94bd56041b2c21dea660b5,EMY,138871,Amtrak Schedule,d0430240b4f95505e491c2b208535930,42954,3,,
4946867,8510daa6c8576e648fcbd4f92ea73a51,99439,15331381,San Diego Schedule,edddfc7d22b3333a3c7a5064daafb17a,COR,4,,
4946868,8510daa6c8576e648fcbd4f92ea73a51,99441,15331381,San Diego Schedule,edddfc7d22b3333a3c7a5064daafb17a,COR,4,,
4946910,8510daa6c8576e648fcbd4f92ea73a51,99441,15331390,San Diego Schedule,063d619243363b3ac1bc2155275e6517,COR,4,,
