#### Introduction

This notebook creates maps and tables to determine the transit service at or near traffic signals. Currently, only signals on the State Highway Network (SHN) are shown.

#### How to use
On the calitp Jupyterhub, import `shared_utils` and authenticate to GCS.

#### Technical details
- Signal data is sourced from HQ Traffic Ops GIS, downloaded on 2025-09-08.
Segment data is calculated based on GTFS data collected by DDS for the selected targed date
- Routes are associated with a signal if they run within 155 meters of the signal. 
- Speeds and frequencies are defined based on data in the morning peak (7a-10a). Displayed speeds are the 20th percentile speeds.
- Network lines are based on data provided by Rebel. They are manually drawn, and do not strictly represent transit routes.
- Detailed speeds methodology is available at https://analysis.dds.dot.ca.gov/rt/README.html

In [1]:
from signal_tools import sjoin_signals
import branca
import geopandas as gpd
from calitp_data_analysis import gcs_geopandas, geography_utils
from shared_utils import catalog_utils, rt_dates, rt_utils, webmap_utils

In [2]:
# constants
TARGET_DATE = rt_dates.DATES["jul2025"]
SIGNAL_URI = "gs://calitp-analytics-data/data-analyses/rt_delay/signals/signals_2025-09-08.geojson"
TARGET_TIME_OF_DAY = "AM Peak"
ANALYSIS_DISTRICT_NUMBER = 7
TARGET_TIME_OF_DAY_LENGTH_HOURS = (
    3  # the length of the target time of day (3 hours for am peak)
)

In [3]:
g = gcs_geopandas.GCSGeoPandas()

In [4]:
# read geo files
shared_data_catalog = catalog_utils.get_catalog("shared_data_catalog")
gtfs_data_constants = catalog_utils.get_catalog("gtfs_analytics_data")

# Get district polygons to mask
districts = shared_data_catalog.caltrans_districts.read()
analysis_district = districts.loc[districts["DISTRICT"] == ANALYSIS_DISTRICT_NUMBER]

# Get speedmap data
speedmap_segments = g.read_parquet(
    f"{gtfs_data_constants.speedmap_segments.dir}{gtfs_data_constants.speedmap_segments.segment_timeofday}_{TARGET_DATE}.parquet",
    filters=[
        ("time_of_day", "=", TARGET_TIME_OF_DAY)
    ],  # Filter for only a selected time of day
).clip(analysis_district)

# Get signal data
signals = (
    g.read_file(
        SIGNAL_URI,
    )
    .rename(columns=lambda s: s.lower())
    .clip(analysis_district)
)  # we want columns to be all lower case
# Filter out devices that aren't actually signals
signals_with_transit = signals.loc[
    signals["tms_unit_type"] == "Traffic Signals"
].set_index("objectid")

In [5]:
# Get one GDF with signals and their nearest segment
buffered_speedmap_segments = gpd.GeoDataFrame(
    data=speedmap_segments.drop(speedmap_segments.geometry.name, axis=1),
    geometry=speedmap_segments.to_crs(geography_utils.CA_NAD83Albers_m).buffer(5),
)

In [6]:
# Join segments to signals
sjoined_signals_segments = (
    sjoin_signals(
        signal_gdf=signals_with_transit.reset_index(),
        segments_gdf=buffered_speedmap_segments,
        segments_lines_gdf=speedmap_segments,
    )
    .drop("geometry", axis=1)
    .set_geometry("line_geom")
)

# Get the distance from the segment to the associate signal
sjoined_signals_segments["distance_to_signal"] = (
    sjoined_signals_segments["line_geom"]
    .to_crs(geography_utils.CA_NAD83Albers_m)
    .distance(
        sjoined_signals_segments["signal_pt_geom"].to_crs(
            geography_utils.CA_NAD83Albers_m
        )
    )
)

In [7]:
# Make sure we only count the one shape per signal
signals_segments_removed_duplicates = sjoined_signals_segments.sort_values(
    ["distance_to_signal"], ascending=True
).drop_duplicates(subset=["shape_id", "objectid"], keep="first")

# Groupby signal id
speedmaps_grouped_by_signal = signals_segments_removed_duplicates.groupby("objectid")

# Get frequencies through a stop
signals_with_transit["trips_hr_sch"] = speedmaps_grouped_by_signal["trips_hr_sch"].sum()
# Get all the routes that serve a stop
agg_names = lambda names: ", ".join(names.drop_duplicates().dropna())
signals_with_transit["route_names_aggregated"] = (
    speedmaps_grouped_by_signal["route_short_name"]
).agg(agg_names)
signals_with_transit["organization_names_aggregated"] = (
    speedmaps_grouped_by_signal["organization_name"]
).agg(agg_names)

In [8]:
# Get GDFs formatted for display on the webmap

# Segment GDF
arrowized_gdf = (
    sjoined_signals_segments.drop(["signal_pt_geom"], axis=1)
    .to_crs(geography_utils.CA_NAD83Albers_m)
    .rename_geometry("geometry")
)
arrowized_gdf.geometry = arrowized_gdf.geometry.apply(rt_utils.try_parallel)
arrowized_gdf = arrowized_gdf.apply(
    rt_utils.arrowize_by_frequency, axis=1, frequency_col="trips_hr_sch"
)
arrowized_gdf["route_short_name"] = (
    arrowized_gdf["organization_name"] + " - " + arrowized_gdf["route_short_name"]
)

# Signal GDF
buffered_signals = gpd.GeoDataFrame(
    data=signals_with_transit.rename(
        columns={"trips_hr_sch": "Trips/Hour"}
    ),
    geometry=signals_with_transit.to_crs(geography_utils.CA_NAD83Albers_m).buffer(50),
).reset_index()

In [9]:
# Define columns to include
signals_with_transit_display_columns = [
    "tms_unit_type",
    "route_names_aggregated",
    "organization_names_aggregated",
    "Trips/Hour",
    # "asset_sub_type",
    "tms_id",
    "imms_id",
    "delegation_type",
    # "leased_owned",
    # "comment",
    signals_with_transit.geometry.name,
]
arrowized_segments_display_columns = [
    "trips_hr_sch",
    "p50_mph",
    "p20_mph",
    "p80_mph",
    "route_short_name",
    "stop_pair_name",
    "segment_id",
    "route_id",
    "shape_id",
    arrowized_gdf.geometry.name,
]

In [10]:
# Get study corridors to add onto the map
study_corridors = gpd.read_file("study_corridors_lines.geojson")
study_corridors["bus_lane"] = study_corridors["bus_lane"].fillna(0)
study_corridors.geometry = study_corridors.to_crs(
    geography_utils.CA_NAD83Albers_m
).buffer(100, cap_style="flat")
# Get colormap based on whether the analysis segment is a bus lane
DDS_GREY = "#d9d9d6"
DDS_BLUE = "#b1e4e3"
study_corridor_colormap = branca.colormap.StepColormap(
    colors=[DDS_GREY, DDS_BLUE], index=[0, 1]
)

In [11]:
# Create webmap
SIGNAL_LEGEND_URL= "https://storage.googleapis.com/calitp-map-tiles/signal_legend.svg"
signal_colorscale = branca.colormap.step.Purples_05.scale(
    vmin=0, vmax=sjoined_signals_segments["trips_hr_sch"].max()
)
signal_folder = "signals_v12_31/"
# Study corridors
study_corridor_map = webmap_utils.set_state_export(
    study_corridors,
    subfolder=signal_folder,
    filename="study_corridors",
    cmap=study_corridor_colormap,
    color_col="bus_lane",
    map_title="Study Corridors",
)
# Speeds
speedmap = webmap_utils.set_state_export(
    arrowized_gdf[arrowized_segments_display_columns],
    subfolder=signal_folder,
    filename="speeds",
    cmap=rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE,
    color_col="p20_mph",
    cache_seconds=1,
    map_type="new_speedmap",
    legend_url=rt_utils.SPEEDMAP_LEGEND_URL,
    map_title="Speeds",
    existing_state=study_corridor_map["state_dict"],
)
# Signals
signal_speedmap = webmap_utils.set_state_export(
    buffered_signals[signals_with_transit_display_columns],
    subfolder=signal_folder,
    cmap=signal_colorscale,
    color_col="Trips/Hour",
    existing_state=speedmap["state_dict"],
    map_title=f"Signals with Approach Speeds {TARGET_DATE}",
    # legend_url=SIGNAL_LEGEND_URL,
    manual_centroid=[34.048108, -118.4183252],
)
signal_speedmap


  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())

  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())


{'state_dict': {'name': 'null',
  'layers': [{'name': 'Study Corridors',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v12_31/study_corridors.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}},
   {'name': 'Speeds',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v12_31/speeds.geojson.gz',
    'properties': {'stroked': False,
     'highlight_saturation_multiplier': 0.5,
     'tooltip_speed_key': 'p20_mph'},
    'type': 'new_speedmap'},
   {'name': 'Signals with Approach Speeds 2025-07-16',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v12_31/test2.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}}],
  'lat_lon': [34.048108, -118.4183252],
  'zoom': 13,
  'legend_url': 'https://storage.googleapis.com/calitp-map-tiles/speeds_legend.svg'},
 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJTd

In [12]:
# Get signal-route grain data
signal_route_group = signals_segments_removed_duplicates.groupby(
    ["objectid", "route_short_name", "organization_name", "direction_id"]
)

signals_routes_frequency = signal_route_group["trips_hr_sch"].sum()
merged_signals_routes_frequency = signals_routes_frequency.reset_index().merge(
    signals_with_transit[
        [
            "tms_unit_type",
            "asset_sub_type",
            "location",
            "tms_id",
            "imms_id",
            "delegation_type",
            "leased_owned",
            "comment",
            "geometry",
        ]
    ],
    how="left",
    left_on="objectid",
    right_index=True,
    validate="many_to_one",
)
merged_geometry = gpd.GeoSeries(
    merged_signals_routes_frequency["geometry"]
).to_crs(geography_utils.WGS84)
merged_signals_routes_frequency["latitude"] = merged_geometry.y.round(5)
merged_signals_routes_frequency["longitude"] = merged_geometry.x.round(5)
merged_signals_routes_frequency.drop("geometry", axis=1).to_csv(
    "signals_routes.csv", index=False
)

In [13]:
buffered_signals.head()["Trips/Hour"]

0    10.667
1     8.667
2    12.667
3    11.667
4    11.667
Name: Trips/Hour, dtype: float64

In [14]:
# Save signal-grain data to a csv
signals_with_transit["latitude"] = signals_with_transit.geometry.y.round(5)
signals_with_transit["longitude"] = signals_with_transit.geometry.x.round(5)
signals_with_transit.rename(columns={"trips_hr_sch": "Trips/Hour"})[
    [*signals_with_transit_display_columns, "latitude", "longitude", "comment"]
].drop(signals_with_transit.geometry.name, axis=1).to_csv("signals_aggregated.csv")

In [15]:
signals_with_transit["latitude"]

objectid
11083    33.75381
13559    33.75611
11084    33.75812
11085    33.76049
11086    33.76733
           ...   
11903    34.56537
11902    34.57279
11901    34.57573
11899    34.58013
11900    34.58019
Name: latitude, Length: 1281, dtype: float64