In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [2]:
import geopandas as gpd
import pandas as pd
from siuba import *
import numpy as np

from segment_speed_utils import helpers, gtfs_schedule_wrangling
from shared_utils import rt_dates, gtfs_utils_v2
import folium
import itertools

In [3]:
from update_vars import (analysis_date, AM_PEAK, PM_PEAK, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, AM_PEAK, PM_PEAK, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD)

# Why is the Coronado ferry missing from our datasets?

See https://github.com/cal-itp/data-analyses/issues/1458

In [4]:
analysis_date

'2025-04-16'

In [5]:
import rail_ferry_brt_stops

In [6]:
stops_all = rail_ferry_brt_stops.assemble_stops(analysis_date)

In [12]:
ferry = stops_all.query('route_type == "4"')

In [16]:
# ferry # coronado not present!

## earlier stages

In [45]:
trips = helpers.import_scheduled_trips(
    analysis_date,
    columns = [
        "name", "feed_key",
        "trip_instance_key", "trip_id", 
        "route_id", "route_type", "route_desc"
    ],
    get_pandas = True
)

### present in trips

In [46]:
trips.query('route_id == "COR"') #  present in trips

Unnamed: 0,name,feed_key,trip_instance_key,trip_id,route_id,route_type,route_desc
8010,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,bb077aa05b3bab38c8d87f16ec567a6f,15331358,COR,4,
8011,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,ae163073f5df72dd20e25eef248e85b8,15331347,COR,4,
8012,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,a9e6ebfead8f4d083d5e2c352a667af0,15331420,COR,4,
8013,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,146253f66632b630b81c049776f7e846,15331423,COR,4,
8014,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,cb617cd344351cf7e95f43dbdef8bace,15331407,COR,4,
...,...,...,...,...,...,...,...
53854,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,11f0f0eba9b338205875b0b8e988654e,15331334,COR,4,
53855,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,6e062fcd915fdede3614a932babe6dcf,15331366,COR,4,
53856,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,148b592082a0e354c0ad8b13f1621c4e,15331380,COR,4,
53857,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,d20cd9e8e3309e107374ce02f6bcffee,15331426,COR,4,


In [51]:
stop_times = helpers.import_scheduled_stop_times(
    analysis_date,
    columns = ["feed_key", "schedule_gtfs_dataset_key",
               "stop_id", "trip_instance_key"],
    with_direction = True,
    get_pandas = True
)

In [53]:
stops_with_route = pd.merge(
    stop_times,
    trips,
    on = "trip_instance_key",
    how = "inner"
).drop(
    columns = "trip_instance_key"
).drop_duplicates().reset_index(drop=True)

### not present in stop times with direction

In [54]:
stops_with_route.query('route_id == "COR"') #  gone

Unnamed: 0,feed_key_x,schedule_gtfs_dataset_key,stop_id,name,feed_key_y,trip_id,route_id,route_type,route_desc


## present in no-direction version of stop times

In [48]:
st_no_dir = helpers.import_scheduled_stop_times(
    analysis_date,
    # columns = ["feed_key", "schedule_gtfs_dataset_key",
    #            "stop_id", "trip_instance_key"],
    with_direction = False,
    get_pandas = True
)

In [49]:
trips.query('route_id == "COR"').merge(st_no_dir, on=['feed_key', 'trip_id'])

Unnamed: 0,name,feed_key,trip_instance_key,trip_id,route_id,route_type,route_desc,feed_timezone,base64_url,stop_id,stop_sequence,timepoint,arrival_sec,departure_sec,arrival_hour,departure_hour
0,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,bb077aa05b3bab38c8d87f16ec567a6f,15331358,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99440,1,1.0,72000.0,72000.0,20.0,20.0
1,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,bb077aa05b3bab38c8d87f16ec567a6f,15331358,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99439,2,1.0,72600.0,72600.0,20.0,20.0
2,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,ae163073f5df72dd20e25eef248e85b8,15331347,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99440,1,1.0,32400.0,32400.0,9.0,9.0
3,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,ae163073f5df72dd20e25eef248e85b8,15331347,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99439,2,1.0,33000.0,33000.0,9.0,9.0
4,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,a9e6ebfead8f4d083d5e2c352a667af0,15331420,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99439,1,1.0,25800.0,25800.0,7.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,148b592082a0e354c0ad8b13f1621c4e,15331380,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99441,2,1.0,67500.0,67500.0,18.0,18.0
176,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,d20cd9e8e3309e107374ce02f6bcffee,15331426,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99440,1,1.0,24600.0,24600.0,6.0,6.0
177,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,d20cd9e8e3309e107374ce02f6bcffee,15331426,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99439,2,1.0,25200.0,25200.0,7.0,7.0
178,San Diego Schedule,8510daa6c8576e648fcbd4f92ea73a51,4e21ef60e1808b9a9907dabbd20cb097,15331338,COR,4,,America/Los_Angeles,aHR0cHM6Ly93d3cuc2RtdHMuY29tL2dvb2dsZV90cmFuc2...,99440,2,1.0,52800.0,52800.0,14.0,14.0
