In [1]:
import os
os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB?

In [2]:
import geopandas as gpd
import pandas as pd
from siuba import *
import numpy as np

from segment_speed_utils import helpers

import sys
sys.path.append('./scripts/')

In [3]:
from update_vars import (analysis_date, EXPORT_PATH, GCS_FILE_PATH, PROJECT_CRS,
SEGMENT_BUFFER_METERS, HQ_TRANSIT_THRESHOLD, MS_TRANSIT_THRESHOLD, SHARED_STOP_THRESHOLD,
                        HALF_MILE_BUFFER_METERS)

In [4]:
import create_aggregate_stop_frequencies as casf

In [6]:
import branching_derived_intersections

In [7]:
import importlib
importlib.reload(casf)
importlib.reload(branching_derived_intersections)

<module 'branching_derived_intersections' from '/home/jovyan/data-analyses/high_quality_transit_areas/branching_derived_intersections.py'>

In [8]:
# !pip install calitp-data-analysis==2025.6.24
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas
gcsgp = GCSGeoPandas()

In [9]:
analysis_date

'2025-06-11'

In [10]:
stop_times = helpers.import_scheduled_stop_times(
    analysis_date,
    get_pandas = True,
)

stop_times = casf.add_route_dir(stop_times, analysis_date)

st_prepped = stop_times.pipe(casf.prep_stop_times)

  arr, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)


## HCD Y-Branching

* combine single route frequencies with collinearity screen...
* actually, these routes need to pass _both_ a collinearity screen and a non-collinearity screen: 8+ stops shared and also 8+ stops not shared (meaningful service on all 3 legs of the Y)

In [11]:
max_arrivals_by_stop_single = st_prepped.pipe(casf.stop_times_aggregation_max_by_stop, analysis_date, single_route_dir=True)

In [12]:
singles_explode = branching_derived_intersections.get_explode_singles(max_arrivals_by_stop_single, MS_TRANSIT_THRESHOLD).explode('route_dir')

In [13]:
from tqdm import tqdm
tqdm.pandas()

In [14]:
share_counts = {}
singles_explode.groupby(['schedule_gtfs_dataset_key', 'stop_id']).progress_apply(casf.accumulate_share_count, share_counts=share_counts)

100%|██████████| 18698/18698 [00:02<00:00, 7094.91it/s]


In [15]:
SHARED_STOP_THRESHOLD

8

In [16]:
qualify_dict = {key: share_counts[key] for key in share_counts.keys() if share_counts[key] >= SHARED_STOP_THRESHOLD}
feeds_to_filter = np.unique([key.split('__')[0] for key in qualify_dict.keys()])

In [17]:
feeds_to_filter

array(['0666caf3ec1ecc96b74f4477ee4bc939',
       '3364ec074ca85001da3abd78be2ae521',
       '3c275e5acf8974e1afd765bd3011424c',
       '4b317fc27dde351e12253d46cedd8df0',
       '7cc0cb1871dfd558f11a2885c145d144',
       'c499f905e33929a641f083dad55c521e',
       'ecd018ad66f497fb8f188ed5a71b284b',
       'fb467982dcc77a7f9199bebe709bb700'], dtype='<U32')

In [18]:
shapes = helpers.import_scheduled_shapes(analysis_date, columns=['shape_array_key', 'geometry'])
trips = (branching_derived_intersections.get_trips_with_route_dir(analysis_date)
         .query("schedule_gtfs_dataset_key.isin(@feeds_to_filter)")
         .drop_duplicates(subset=['schedule_gtfs_dataset_key', 'shape_array_key', 'route_dir'])
        )
feeds = trips[['feed_key', 'schedule_gtfs_dataset_key']].drop_duplicates()
stops = helpers.import_scheduled_stops(analysis_date, columns=['feed_key', 'stop_id', 'geometry'])

* dissolve shapes by route+dir, pick longest
* also try  dissolve shapes by route+dir, dissolve
* find non-overlap by previously qualifying pair

In [19]:
BRANCHING_OVERLAY_BUFFER = 20
AREA_MULTIPLIER = BRANCHING_OVERLAY_BUFFER * 2

TARGET_METERS_DIFFERENCE = 5000 #  5km per route
TARGET_AREA_DIFFERENCE = TARGET_METERS_DIFFERENCE * AREA_MULTIPLIER

In [20]:
shapes = shapes.merge(trips, on='shape_array_key')
shapes = shapes.assign(length = shapes.geometry.length)

In [21]:
shapes.geometry = shapes.buffer(BRANCHING_OVERLAY_BUFFER)
shapes = shapes.assign(area = shapes.geometry.map(lambda x: x.area))

In [22]:
max_by_route_dir = shapes.groupby(['schedule_gtfs_dataset_key', 'route_dir']).length.max().reset_index()
shapes = (shapes.merge(max_by_route_dir, on = ['schedule_gtfs_dataset_key', 'route_dir', 'length'])
          .drop_duplicates(subset = ['schedule_gtfs_dataset_key', 'route_dir', 'length'])
         )

In [23]:
# shapes.explore(column='length')

In [24]:
feeds_to_filter

array(['0666caf3ec1ecc96b74f4477ee4bc939',
       '3364ec074ca85001da3abd78be2ae521',
       '3c275e5acf8974e1afd765bd3011424c',
       '4b317fc27dde351e12253d46cedd8df0',
       '7cc0cb1871dfd558f11a2885c145d144',
       'c499f905e33929a641f083dad55c521e',
       'ecd018ad66f497fb8f188ed5a71b284b',
       'fb467982dcc77a7f9199bebe709bb700'], dtype='<U32')

In [25]:
# gtfs_dataset_key = feeds_to_filter[0]
gtfs_dataset_key = feeds_to_filter[0]
gtfs_dataset_key

'0666caf3ec1ecc96b74f4477ee4bc939'

In [40]:
# unique_qualify_pairs = branching_derived_intersections.evaluate_overlaps(gtfs_dataset_key, qualify_dict, shapes, show_map=True)

## Adding stops

In [27]:
# hcd_branching_stops = []
# for gtfs_dataset_key in feeds_to_filter:
#     unique_qualify_pairs = branching_derived_intersections.evaluate_overlaps(gtfs_dataset_key, show_map=False)
#     this_feed_stops = branching_derived_intersections.find_stops_this_feed(gtfs_dataset_key, max_arrivals_by_stop_single, unique_qualify_pairs)
#     hcd_branching_stops += [this_feed_stops]
# hcd_branching_stops = pd.concat(hcd_branching_stops)

In [28]:
hcd_branching_stops = gcsgp.read_parquet(f"{GCS_FILE_PATH}branching_major_stops.parquet")

In [29]:
hcd_branching_stops

Unnamed: 0,feed_key,stop_id,geometry,schedule_gtfs_dataset_key
0,8d9623a1823a27925b7e2f00e44fc5bb,2773,POINT (164009.800 -435271.586),0666caf3ec1ecc96b74f4477ee4bc939
1,8d9623a1823a27925b7e2f00e44fc5bb,2641,POINT (163232.234 -434098.714),0666caf3ec1ecc96b74f4477ee4bc939
2,8d9623a1823a27925b7e2f00e44fc5bb,3873,POINT (162065.780 -432375.210),0666caf3ec1ecc96b74f4477ee4bc939
3,8d9623a1823a27925b7e2f00e44fc5bb,2080,POINT (162937.850 -433693.564),0666caf3ec1ecc96b74f4477ee4bc939
4,8d9623a1823a27925b7e2f00e44fc5bb,3882,POINT (162817.596 -433529.595),0666caf3ec1ecc96b74f4477ee4bc939
...,...,...,...,...
167,98a10e85e7dfe3ff1caa761d1bc34606,65810,POINT (-160250.962 -75021.181),fb467982dcc77a7f9199bebe709bb700
168,98a10e85e7dfe3ff1caa761d1bc34606,65812,POINT (-160191.766 -75082.017),fb467982dcc77a7f9199bebe709bb700
169,98a10e85e7dfe3ff1caa761d1bc34606,65812,POINT (-160191.766 -75082.017),fb467982dcc77a7f9199bebe709bb700
170,98a10e85e7dfe3ff1caa761d1bc34606,65815,POINT (-166774.211 -68202.607),fb467982dcc77a7f9199bebe709bb700


## script intergration

In [31]:
import google.auth
credentials, _ = google.auth.default()
import intake
catalog = intake.open_catalog("*.yml")

In [34]:
points = catalog.hqta_points(geopandas_kwargs={"storage_options": {"token": credentials}}).read()

In [37]:
mts = points.query('hqta_type != "hq_corridor_bus"')

In [39]:
# mts.explore(tiles='CartoDB Positron')

## webmap

In [38]:
# !pip install calitp-data-analysis==2025.6.24
from calitp_data_analysis.gcs_geopandas import GCSGeoPandas

gcsgp = GCSGeoPandas()
hqts = gcsgp.read_parquet(f'{EXPORT_PATH}ca_hq_transit_stops.parquet')

In [42]:
gcsgp.read_file?

[0;31mSignature:[0m [0mgcsgp[0m[0;34m.[0m[0mread_file[0m[0;34m([0m[0mpath[0m[0;34m,[0m [0;34m*[0m[0margs[0m[0;34m,[0m [0;34m**[0m[0mkwds[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m Delegates to gpd.read_file with the file at the path specified in the GCS filesystem
[0;31mFile:[0m      /opt/conda/lib/python3.11/site-packages/calitp_data_analysis/gcs_geopandas.py
[0;31mType:[0m      method

In [38]:
hqts.columns

Index(['agency_primary', 'hqta_type', 'stop_id', 'route_id', 'hqta_details',
       'agency_secondary', 'base64_url_primary', 'base64_url_secondary',
       'org_id_primary', 'org_id_secondary', 'avg_trips_per_peak_hr',
       'geometry'],
      dtype='object')

In [44]:
mts = hqts.query('hqta_type.str.contains("major_stop")').to_crs(PROJECT_CRS)

In [46]:
mts.geometry = mts.buffer(HALF_MILE_BUFFER_METERS)
hcd_branching_stops.geometry = hcd_branching_stops.buffer(HALF_MILE_BUFFER_METERS)

In [48]:
mts['color'] = [(230, 45, 39)] * mts.shape[0]

In [49]:
hcd_branching_stops['color'] = [(24, 49, 214)] * hcd_branching_stops.shape[0]

In [50]:
from shared_utils import webmap_utils

In [79]:
current_state = webmap_utils.set_state_export(mts.dissolve(), filename='test_major_stops0702', color_col='color',
                                             map_title='Current Major Transit Stops (buffered)')


  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())


In [80]:
current_state

{'state_dict': {'name': 'null',
  'layers': [{'name': 'Current Major Transit Stops (buffered)',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/testing/test_major_stops0702.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5},
    'type': ['speedmap',
     'speed_variation',
     'new_speedmap',
     'new_speed_variation',
     'hqta_areas',
     'hqta_stops',
     'state_highway_network',
     None]}],
  'lat_lon': (35.2889112842056, -119.47858852079196),
  'zoom': 13},
 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJDdXJyZW50IE1ham9yIFRyYW5zaXQgU3RvcHMgKGJ1ZmZlcmVkKSIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvdGVzdGluZy90ZXN0X21ham9yX3N0b3BzMDcwMi5nZW9qc29uLmd6IiwgInByb3BlcnRpZXMiOiB7InN0cm9rZWQiOiBmYWxzZSwgImhpZ2hsaWdodF9zYXR1cmF0aW9uX211bHRpcGxpZXIiOiAwLjV9LCAidHlwZSI6IFsic3BlZWRtYXAiLCAic3BlZWRfdmFyaWF0aW9uIiwgIm5ld19zcGVlZG1hcCIsICJuZXdfc3B

In [81]:
branch_state = webmap_utils.set_state_export(hcd_branching_stops, filename='test_branching_stops0702', color_col='color',
                                      map_title='New "Y-branching" Major Transit Stops (buffered)'
                                            )


  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())


In [82]:
branch_state

{'state_dict': {'name': 'null',
  'layers': [{'name': 'New "Y-branching" Major Transit Stops (buffered)',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/testing/test_branching_stops0702.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5},
    'type': ['speedmap',
     'speed_variation',
     'new_speedmap',
     'new_speed_variation',
     'hqta_areas',
     'hqta_stops',
     'state_highway_network',
     None]}],
  'lat_lon': (35.543636272302784, -119.93017518848204),
  'zoom': 13},
 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJOZXcgXCJZLWJyYW5jaGluZ1wiIE1ham9yIFRyYW5zaXQgU3RvcHMgKGJ1ZmZlcmVkKSIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvdGVzdGluZy90ZXN0X2JyYW5jaGluZ19zdG9wczA3MDIuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiBbInNwZWVkbWFwIiwgInNwZWVkX3ZhcmlhdGl

In [83]:
both_state = webmap_utils.set_state_export(hcd_branching_stops, filename='test_branching_stops0702', color_col='color',
                                     existing_state = current_state['state_dict'], map_title='New "Y-branching" in blue, existing in red'
                                            )


  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())


In [84]:
both_state

{'state_dict': {'name': 'null',
  'layers': [{'name': 'Current Major Transit Stops (buffered)',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/testing/test_major_stops0702.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5},
    'type': ['speedmap',
     'speed_variation',
     'new_speedmap',
     'new_speed_variation',
     'hqta_areas',
     'hqta_stops',
     'state_highway_network',
     None]},
   {'name': 'New "Y-branching" in blue, existing in red',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/testing/test_branching_stops0702.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5},
    'type': ['speedmap',
     'speed_variation',
     'new_speedmap',
     'new_speed_variation',
     'hqta_areas',
     'hqta_stops',
     'state_highway_network',
     None]}],
  'lat_lon': (35.543636272302784, -119.93017518848204),
  'zoom': 13},
 'spa_link': 'https://embeddable-maps.calitp.org/?st