In [1]:
import geopandas as gpd
from calitp_data_analysis import gcs_geopandas, geography_utils
from rt_analysis import signal_tools
from shared_utils import catalog_utils, rt_dates, rt_utils, webmap_utils
import branca

In [2]:
# constants
TARGET_DATE = rt_dates.DATES["jun2025"]
SIGNAL_URI = "gs://calitp-analytics-data/data-analyses/rt_delay/signals/signals_2025-09-08.geojson"
TARGET_TIME_OF_DAY = "AM Peak"
ANALYSIS_DISTRICT_NUMBER = 7
TARGET_TIME_OF_DAY_LENGTH_HOURS = (
    3  # the length of the target time of day (3 hours for am peak)
)

In [3]:
g = gcs_geopandas.GCSGeoPandas()

In [4]:
# read geo files
shared_data_catalog = catalog_utils.get_catalog("shared_data_catalog")
gtfs_data_constants = catalog_utils.get_catalog("gtfs_analytics_data")

# Get district polygons to mask
# districts = shared_data_catalog.caltrans_districts.read()
# analysis_district = districts.loc[districts["DISTRICT"] == ANALYSIS_DISTRICT_NUMBER]
analysis_district = gpd.read_file("central_westside.geojson")

# Get speedmap data
speedmap_segments = g.read_parquet(
    f"{gtfs_data_constants.speedmap_segments.dir}{gtfs_data_constants.speedmap_segments.segment_timeofday}_{TARGET_DATE}.parquet",
    # filters=[("caltrans_district", "=", f"{ANALYSIS_DISTRICT_NUMBER}")] not working for some reason, think it needs to be 07 - Something
    filters=[
        ("time_of_day", "=", TARGET_TIME_OF_DAY)
    ],  # Filter for only a selected time of day
).clip(analysis_district)
# Get signal data
signals = (
    g.read_file(
        SIGNAL_URI,  # filters=[("District", "=", f"{ANALYSIS_DISTRICT_NUMBER}")] #comment out to do statewide analysis
    )
    .rename(columns=lambda s: s.lower())
    .clip(analysis_district)
)  # we want columns to be all lower case
# TODO: filter by district

In [5]:
# Get one GDF with signals and their nearest segment
buffered_speedmap_segments = gpd.GeoDataFrame(
    data=speedmap_segments.drop(speedmap_segments.geometry.name, axis=1),
    geometry=speedmap_segments.to_crs(geography_utils.CA_NAD83Albers_m).buffer(5),
)

In [6]:
def fix_multiline_strings(geom: gpd.GeoSeries) -> gpd.GeoSeries:
    """Replace any MultiLineStrings in the input with LineStrings, keeping only the longest length"""
    # Filter out linestrings
    copy = gpd.GeoDataFrame(geometry=geom.loc[geom.geom_type == "MultiLineString"])

    # Get a unique value (in case the index is weird)
    copy["unique_value"] = 1
    copy["unique_value"] = copy["unique_value"].cumsum()

    # Explode the MultiLineStrings into their constituent linestrings
    exploded = copy.explode(copy.geometry.name, index_parts=False)

    # Pick out the line strings with the longest length
    exploded["geom_length"] = exploded.to_crs(geography_utils.CA_NAD83Albers_m).length
    exploded_sorted = exploded.sort_values(
        ["unique_value", "geom_length"], ascending=False
    )
    condensed = exploded_sorted.drop_duplicates(
        subset=["unique_value"], keep="first"
    ).drop("unique_value", axis=1)

    # Combine the new geometry with the original geometry, and return it
    return geom.where(geom.geom_type != "MultiLineString", condensed.geometry)


unique_segment_identifiers = ["segment_id", "shape_id"]

In [7]:
# Join segments to signals

sjoined_signals_segments = (
    signal_tools.sjoin_signals(
        signal_gdf=signals,
        segments_gdf=buffered_speedmap_segments,
        segments_lines_gdf=speedmap_segments,
    )
    .drop("geometry", axis=1)
    .set_geometry("line_geom")
)
sjoined_signals_segments.geometry = fix_multiline_strings(
    sjoined_signals_segments.geometry
)
# Get whether the signal is "approaching" or not
sjoined_signals_segments["approaching"] = signal_tools.determine_approaching(
    sjoined_signals_segments
)  # unsure if this works
# Get the distance from the segment to the associate signal
sjoined_signals_segments["distance_to_signal"] = (
    sjoined_signals_segments["line_geom"]
    .to_crs(geography_utils.CA_NAD83Albers_m)
    .distance(
        sjoined_signals_segments["signal_pt_geom"].to_crs(
            geography_utils.CA_NAD83Albers_m
        )
    )
)
# Get the number of scheduled vehicles per hour
sjoined_signals_segments["vehicles_per_hour_sch"] = (
    sjoined_signals_segments["n_trips_sch"] / TARGET_TIME_OF_DAY_LENGTH_HOURS
)

# Filter for only approaching vehicles
sjoined_signals_segments_approaching = sjoined_signals_segments.loc[
    sjoined_signals_segments["approaching"]
].copy()
# sjoined_signals_segments_

In [9]:
# Get signal grain data
signals_with_transit = signals.loc[
    signals["tms_unit_type"] == "Traffic Signals"
].set_index("objectid")

# Make sure we only count the one shape per signal
signals_segments_removed_duplicates = sjoined_signals_segments.sort_values(
    ["distance_to_signal"], ascending=True
).drop_duplicates(subset=["shape_id", "objectid"], keep="first")

# Groupby signal id
speedmaps_grouped_by_signal = signals_segments_removed_duplicates.groupby("objectid")

# Get frequencies through a stop
signals_with_transit["vehicles_per_hour_sch"] = speedmaps_grouped_by_signal[
    "vehicles_per_hour_sch"
].sum()
# Get all the routes that serve a stop
signals_with_transit["segment_names_counted"] = (
    speedmaps_grouped_by_signal["route_short_name"]
).agg(lambda s: (s.drop_duplicates().astype(str) + " & ").sum())

In [10]:
# Get signal-route grain data
signal_route_group = signals_segments_removed_duplicates.groupby(
    ["objectid", "route_short_name", "direction_id"]
)

signals_routes_frequency = signal_route_group["vehicles_per_hour_sch"].sum()
signals_routes_frequency.reset_index().set_index("objectid")

Unnamed: 0_level_0,route_short_name,direction_id,vehicles_per_hour_sch
objectid,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
9724,574,1.0,0.333333
9725,134,0.0,2.000000
9725,134,1.0,2.000000
9725,33,0.0,4.000000
9725,33,1.0,4.000000
...,...,...,...
13582,81,1.0,4.000000
13582,90,0.0,3.000000
13582,90,1.0,3.000000
13582,94,0.0,4.000000


In [11]:
sjoined_signals_segments.columns

Index(['schedule_gtfs_dataset_key', 'shape_id', 'shape_array_key', 'route_id',
       'direction_id', 'stop_pair', 'segment_id', 'stop_pair_name',
       'time_of_day', 'p50_mph', 'n_trips', 'p20_mph', 'p80_mph',
       'n_trips_sch', 'trips_hr_sch', 'route_short_name', 'name',
       'caltrans_district', 'organization_source_record_id_x',
       'organization_name', 'base64_url', 'imms_id_x', 'objectid', 'location',
       'imms_id_y', 'signal_pt_geom', 'line_geom',
       'organization_source_record_id_y', 'approaching', 'distance_to_signal',
       'vehicles_per_hour_sch'],
      dtype='object')

In [20]:
signals_with_transit_display_columns = [
    "tms_unit_type",
    "asset_sub_type",
    #"tms_id",
    "imms_id",
    "delegation_type",
    #"leased_owned",
    #"comment",
    "vehicles_per_hour_sch",
    "segment_names_counted",
    signals_with_transit.geometry.name,
]
sjoined_signals_segments_display_columns = [
    "p50_mph",
    "p20_mph",
    "p80_mph",
    "route_short_name",
    "organization_name",
    sjoined_signals_segments.geometry.name,
]

In [27]:
# Create webmap
SIGNAL_LEGEND_URL = 'https://storage.googleapis.com/calitp-map-tiles/signal_legend.svg'
signal_colorscale = branca.colormap.step.Purples_05.scale(vmin=0, vmax=sjoined_signals_segments["vehicles_per_hour_sch"].max())
speedmap = webmap_utils.set_state_export(
    sjoined_signals_segments_approaching.drop("signal_pt_geom", axis=1),
    subfolder="signals_v2",
    filename="speeds",
    cmap=rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE,
    color_col="p20_mph",
    cache_seconds=1,
    map_type="new_speedmap",
    legend_url=rt_utils.SPEEDMAP_LEGEND_URL,
    map_title="Speeds",
)
signal_speedmap = webmap_utils.set_state_export(
    signals_with_transit[signals_with_transit_display_columns],
    subfolder="signals_v2",
    cmap=signal_colorscale,
    color_col="vehicles_per_hour_sch",
    existing_state=speedmap['state_dict'],
    map_title=f"Signals with Approach Speeds {TARGET_DATE}",
    legend_url=SIGNAL_LEGEND_URL,
    manual_centroid = [33.699342, -117.987465]
)
signal_speedmap


  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())


{'state_dict': {'name': 'null',
  'layers': [{'name': 'Speeds',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v2speeds.geojson.gz',
    'properties': {'stroked': False,
     'highlight_saturation_multiplier': 0.5,
     'tooltip_speed_key': 'p20_mph'},
    'type': 'new_speedmap'},
   {'name': 'Signals with Approach Speeds 2025-06-11',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v2test2.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}}],
  'lat_lon': [33.699342, -117.987465],
  'zoom': 13,
  'legend_url': 'https://storage.googleapis.com/calitp-map-tiles/signal_legend.svg'},
 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJTcGVlZHMiLCAidXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3NpZ25hbHNfdjJzcGVlZHMuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41LCAidG

In [None]:
signals.loc[signals["tms_unit_type"] == "Traffic Signals"].objectid.duplicated().any()

In [None]:
sjoined_signals_segments.columns

ToDo:
- Immediate
  - Need to get definition for n_trips, and stop double counting segments at scores
    - Something like, for each shape id, we only count the n_trips_sch at the closest point- 
  - Need to figure out why determine_approaching seems not to be working (Melrose / Normandie example?)
  - Need to get frequency per line at each signal
  - Get something working that graphs, even if it's ugly
- Less immediate
  - Use route_short_name
  - Get trips per hour, rather than whatever n_trips_sch is