#### Introduction

This notebook creates maps and tables to determine the transit service at or near traffic signals. Currently, only signals on the State Highway Network (SHN) are shown.

#### How to use
On the calitp Jupyterhub, import `shared_utils` and authenticate to GCS.

#### Technical details
- Signal data is sourced from HQ Traffic Ops GIS, downloaded on 2025-09-08.
Segment data is calculated based on GTFS data collected by DDS for the selected targed date
- Routes are associated with a signal if they run within 155 meters of the signal. 
- Speeds and frequencies are defined based on data in the morning peak (7a-10a). Displayed speeds are the 20th percentile speeds.
- Network lines are based on data provided by Rebel. They are manually drawn, and do not strictly represent transit routes.
- Detailed speeds methodology is available at https://analysis.dds.dot.ca.gov/rt/README.html

#### Outputs
- Signal Map
  - Segments
    - 20th percentile speeds
    - Average frequencies
  - Signals
    - Unit Type
    - Aggregated data from 155 m to the signal
    - Route names (from `route_short_name`)
    - Organization names (from `organization_name`)
    - Total trips per hour, in both directions
    - IMMS ID
    - TMS ID
    - Delegation Type (describes whether a signal is owned/operated/maintaned by Caltrans or a local gov)
- Signal-Grain CSV
  - `signals_aggregated.csv`
    - Same data as in the interactive map
    - Additionally, the ArcGIS objectid is present to serve as a unique identifier
  - `signals_routes.csv`
    - One row for each route in each direction within 155 meters of each signal
    - Objectid
    - Route name
    - Organization name
    - Trips per hour in the chosen direction
    - TMS id
    - IMMS id
    - Delegation Type
    - Leased / Owned (whether the equipment is leased or owned by Caltrans)
    - Traffic Ops Comments
    
    
    

In [1]:
import branca
import geopandas as gpd
import pandas as pd
from calitp_data_analysis import gcs_geopandas, geography_utils
from shared_utils import catalog_utils, rt_dates, rt_utils, webmap_utils
from signal_tools import (
    add_transit_metrics_to_signals,
    filter_points_along_corridor,
    ready_signals_for_display,
    ready_speedmap_segments_for_display,
    sjoin_signals,
)
from uris import (
    CALTRANS_INTERNAL_SIGNAL_URI,
    LOS_ANGELES_OPEN_SIGNAL_URI,
    SANTA_MONICA_OPEN_SIGNAL_URI,
)

In [2]:
# constants
TARGET_DATE = rt_dates.DATES["jul2025"]
TARGET_TIME_OF_DAY = "AM Peak"
ANALYSIS_DISTRICT_NUMBER = 7
TARGET_TIME_OF_DAY_LENGTH_HOURS = (
    3  # the length of the target time of day (3 hours for am peak)
)
SJOIN_DISTANCE_METERS = 155
STUDY_CORRIDOR_BUFFER_DISTANCE = 200
SIGNAL_OWNER_NAME = "signal_owner"

STATE_OF_CALIFORNIA_LABEL = "State of California"
CITY_OF_LOS_ANGELES_LABEL = "City of Los Angeles"
CITY_OF_SANTA_MONICA_LABEL = "City of Santa Monica"

In [3]:
g = gcs_geopandas.GCSGeoPandas()

### Get analysis corridors

In [4]:
study_corridors = gpd.read_file("study_corridors_lines.geojson")
study_corridors_albers = study_corridors.to_crs(geography_utils.CA_NAD83Albers_m)
study_corridors_buffered_wgs84 = study_corridors_albers.buffer(
    STUDY_CORRIDOR_BUFFER_DISTANCE
).to_crs(geography_utils.WGS84)

### Get Signal Data

In [5]:
# Get signal data from Caltrans Traffic Ops Data
caltrans_signals = (
    g.read_file(
        CALTRANS_INTERNAL_SIGNAL_URI,
    )
    .rename(columns=lambda s: s.lower())
    .clip(study_corridors_buffered_wgs84)
    # .clip(analysis_district)
)  # we want columns to be all lower case
# Filter out devices that aren't actually caltrans_signals
caltrans_signals_filtered = caltrans_signals.loc[
    caltrans_signals["tms_unit_type"] == "Traffic Signals"
].set_index("objectid")
caltrans_signals_filtered[SIGNAL_OWNER_NAME] = STATE_OF_CALIFORNIA_LABEL

In [6]:
# Get signal data from LADOT Open Data
ladot_signals = (
    g.read_file(LOS_ANGELES_OPEN_SIGNAL_URI)
    .to_crs(geography_utils.WGS84)
    .clip(study_corridors_buffered_wgs84)
)
ladot_signals_filtered = ladot_signals.loc[
    ladot_signals["ST3"] != "(SMART XWALK)"
].copy()
ladot_signals_filtered[SIGNAL_OWNER_NAME] = CITY_OF_LOS_ANGELES_LABEL

# Filter LADOT signals to avoid double counting signals with Caltrans signals
caltrans_signals_buffered = caltrans_signals_filtered.set_geometry(
    caltrans_signals_filtered.to_crs(geography_utils.CA_NAD83Albers_m)
    .buffer(
        100
    )  # TODO: Not sure if there's a good value for this buffer, might need to do manual work here
    .to_crs(geography_utils.WGS84)
)
ladot_signals_distinct_from_caltrans = ladot_signals_filtered.overlay(
    caltrans_signals_buffered, how="difference"
)

In [7]:
# Get signal data from Santa Monica Open Data
santa_monica_signals = g.read_file(SANTA_MONICA_OPEN_SIGNAL_URI).clip(
    study_corridors_buffered_wgs84
)
santa_monica_signals_albers = santa_monica_signals.to_crs(
    geography_utils.CA_NAD83Albers_m
).reset_index(drop=True)
santa_monica_signals_filtered = santa_monica_signals_albers.loc[santa_monica_signals_albers["circuit_nu"] != -9]
santa_monica_signals_clustered_by_circuit = santa_monica_signals_filtered.dissolve(
    "circuit_nu"
)
santa_monica_signals_duplicates_removed = (
    santa_monica_signals_clustered_by_circuit[
        [santa_monica_signals_filtered.geometry.name]
    ]
    .set_geometry(santa_monica_signals_clustered_by_circuit.geometry.centroid)
    .reset_index()
)
santa_monica_signals_duplicates_removed[SIGNAL_OWNER_NAME] = CITY_OF_SANTA_MONICA_LABEL

### Get Speedmaps

In [8]:
# read geo files
shared_data_catalog = catalog_utils.get_catalog("shared_data_catalog")
gtfs_data_constants = catalog_utils.get_catalog("gtfs_analytics_data")

# Get district polygons to mask
districts = shared_data_catalog.caltrans_districts.read()
analysis_district = districts.loc[districts["DISTRICT"] == ANALYSIS_DISTRICT_NUMBER]

In [9]:
# Get speedmap data
speedmap_segments = g.read_parquet(
    f"{gtfs_data_constants.speedmap_segments.dir}{gtfs_data_constants.speedmap_segments.segment_timeofday}_{TARGET_DATE}.parquet",
    filters=[
        ("time_of_day", "=", TARGET_TIME_OF_DAY)
    ],  # Filter for only a selected time of day
).clip(analysis_district)

### Merge signal and speedmap info

In [10]:
caltrans_sjoined_signals_segments = sjoin_signals(
    signals_gdf=caltrans_signals_filtered.reset_index(),
    segments_lines_gdf=speedmap_segments,
    # keep_columns_signals=["objectid", "imms_id"],
    # keep_columns_segments=["shape_id", "segment_id", "organization_name", "route_id"],
    unique_identifier_signals="objectid",
    signals_buffer_distance=SJOIN_DISTANCE_METERS,
)
caltrans_signals_with_transit_metrics = add_transit_metrics_to_signals(
    signals_gdf=caltrans_signals_filtered.reset_index(),
    sjoined_signals_segments=caltrans_sjoined_signals_segments,
    unique_identifier_signals="objectid",
)

ladot_sjoined_signals_segments = sjoin_signals(
    signals_gdf=ladot_signals_distinct_from_caltrans,
    segments_lines_gdf=speedmap_segments,
    unique_identifier_signals="ID",
    signals_buffer_distance=SJOIN_DISTANCE_METERS,
)
ladot_signals_with_transit_metrics = add_transit_metrics_to_signals(
    signals_gdf=ladot_signals_distinct_from_caltrans,
    sjoined_signals_segments=ladot_sjoined_signals_segments,
    unique_identifier_signals="ID",
)

santa_monica_sjoined_signals_segments = sjoin_signals(
    signals_gdf=santa_monica_signals_duplicates_removed,
    segments_lines_gdf=speedmap_segments,
    unique_identifier_signals="circuit_nu",
    signals_buffer_distance=SJOIN_DISTANCE_METERS,
)
santa_monica_signals_with_transit_metrics = add_transit_metrics_to_signals(
    signals_gdf=santa_monica_signals_duplicates_removed,
    sjoined_signals_segments=santa_monica_sjoined_signals_segments,
    unique_identifier_signals="circuit_nu",
)

### Concatenate segments

In [11]:
# Define columns to include
arrowized_segments_display_columns = [
    "trips_hr_sch",
    "p50_mph",
    "p20_mph",
    "p80_mph",
    "route_short_name",
    "stop_pair_name",
    "segment_id",
    "route_id",
    "shape_id",
]
speedmaps_signals_together = gpd.GeoDataFrame(
    pd.concat(
        [
            caltrans_sjoined_signals_segments,
            ladot_sjoined_signals_segments,
            santa_monica_sjoined_signals_segments,
        ],
    ).drop_duplicates(subset=arrowized_segments_display_columns)
)

### Get GDFs formatted for display on the webmap

In [12]:
transit_info_columns = [
    "route_names_aggregated",
    "organization_names_aggregated",
    "Trips/Hour",
    "signal_owner",
    "color_key"
]
caltrans_signals_with_transit_display_columns = [
    "tms_unit_type",
    *transit_info_columns,
    # "asset_sub_type",
    "tms_id",
    "imms_id",
    "delegation_type",
    # "leased_owned",
    # "comment",
    caltrans_signals_with_transit_metrics.geometry.name,
]
ladot_signals_with_transit_display_columns = [
    "ID",
    *transit_info_columns,
    ladot_signals_with_transit_metrics.geometry.name,
]
santa_monica_signals_with_transit_display_columns = [
    "circuit_nu",
    *transit_info_columns,
    santa_monica_signals_with_transit_metrics.geometry.name,
]

In [13]:
# Segment GDF
arrowized_gdf = ready_speedmap_segments_for_display(speedmaps_signals_together)
# Signal GDF
ownership_map = {
    STATE_OF_CALIFORNIA_LABEL: 0,
    CITY_OF_LOS_ANGELES_LABEL: 1,
    CITY_OF_SANTA_MONICA_LABEL: 2
}
signal_size = 50
caltrans_buffered_signals = ready_signals_for_display(
    caltrans_signals_with_transit_metrics, buffer_distance=signal_size, ownership_map=ownership_map
)
ladot_buffered_signals = ready_signals_for_display(
    ladot_signals_with_transit_metrics, buffer_distance=signal_size, ownership_map=ownership_map
)
santa_monica_buffered_signals = ready_signals_for_display(
    santa_monica_signals_with_transit_metrics, buffer_distance=signal_size, ownership_map=ownership_map
)

In [14]:
# Get study corridors to add onto the map
study_corridors_for_display = study_corridors_albers.set_geometry(
    study_corridors_albers.buffer(100, cap_style="flat")
)
study_corridors_for_display["bus_lane"] = study_corridors["bus_lane"].fillna(0)
# Get colormap based on whether the analysis segment is a bus lane
DDS_GREY = "#d9d9d6"
DDS_BLUE = "#b1e4e3"
study_corridor_colormap = branca.colormap.StepColormap(
    colors=[DDS_GREY, DDS_BLUE], index=[0, 1]
)

In [15]:
# Signals colormap
DDS_MIDDLE_PURPLE = "#7474c1"
DDS_MIDDLE_ORANGE = "#ff8200"
DDS_MIDDLE_BLUE = "#00a3e0"
signal_colorscale = branca.colormap.StepColormap(
    colors=[DDS_MIDDLE_PURPLE, DDS_MIDDLE_ORANGE, DDS_MIDDLE_BLUE],
    vmin=0, vmax=2
)

In [16]:
signal_colorscale(2)

'#00a3e0ff'

### Create webmap

In [17]:
SIGNAL_LEGEND_URL = "https://storage.googleapis.com/calitp-map-tiles/signal_legend.svg"
signal_folder = "signals_v12_46/"

# Study corridors
study_corridor_map = webmap_utils.set_state_export(
    study_corridors_for_display,
    subfolder=signal_folder,
    filename="study_corridors",
    cmap=study_corridor_colormap,
    color_col="bus_lane",
    map_title="Study Corridors",
)

# Speeds
speedmap = webmap_utils.set_state_export(
    arrowized_gdf[[*arrowized_segments_display_columns, arrowized_gdf.geometry.name]],
    subfolder=signal_folder,
    filename="speeds",
    cmap=rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE,
    color_col="p20_mph",
    cache_seconds=1,
    map_type="new_speedmap",
    # legend_url=rt_utils.SPEEDMAP_LEGEND_URL,
    map_title="Speeds",
    existing_state=study_corridor_map["state_dict"],
)

# Signals
# Caltrans
caltrans_signal_speedmap = webmap_utils.set_state_export(
    caltrans_buffered_signals[caltrans_signals_with_transit_display_columns],
    subfolder=signal_folder,
    cmap=signal_colorscale,
    color_col="color_key",
    existing_state=speedmap["state_dict"],
    map_title="ct_signals",
    filename="ct_signals"
    # legend_url=SIGNAL_LEGEND_URL,
    # manual_centroid=[34.048108, -118.4183252],
)
ladot_signal_speedmap = webmap_utils.set_state_export(
    ladot_buffered_signals[ladot_signals_with_transit_display_columns],
    subfolder=signal_folder,
    cmap=signal_colorscale,
    color_col="color_key",
    existing_state=caltrans_signal_speedmap["state_dict"],
    map_title="ladot_signals",
    filename="ladot_signals"
    # legend_url=SIGNAL_LEGEND_URL,
    # manual_centroid=[34.048108, -118.4183252],
)
santa_monica_signal_speedmap = webmap_utils.set_state_export(
    santa_monica_buffered_signals[santa_monica_signals_with_transit_display_columns],
    subfolder=signal_folder,
    cmap=signal_colorscale,
    color_col="color_key",
    existing_state=ladot_signal_speedmap["state_dict"],
    map_title=f"Signals with Approach Speeds {TARGET_DATE}",
    filename="santa_monica_signals",
    # legend_url=SIGNAL_LEGEND_URL,
    manual_centroid=[34.048108, -118.4183252],
)
santa_monica_signal_speedmap


  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())

  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())

  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())

  centroid = (gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean())


{'state_dict': {'name': 'null',
  'layers': [{'name': 'Study Corridors',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v12_46/study_corridors.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}},
   {'name': 'Speeds',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v12_46/speeds.geojson.gz',
    'properties': {'stroked': False,
     'highlight_saturation_multiplier': 0.5,
     'tooltip_speed_key': 'p20_mph'},
    'type': 'new_speedmap'},
   {'name': 'ct_signals',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v12_46/ct_signals.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}},
   {'name': 'ladot_signals',
    'url': 'https://storage.googleapis.com/calitp-map-tiles/signals_v12_46/ladot_signals.geojson.gz',
    'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5}},
   {'name': 'Signals with Approach Speeds 2025-07-16',
    'url'

In [18]:
ladot_buffered_signals[SIGNAL_OWNER_NAME]

0      City of Los Angeles
1      City of Los Angeles
2      City of Los Angeles
3      City of Los Angeles
4      City of Los Angeles
              ...         
189    City of Los Angeles
190    City of Los Angeles
191    City of Los Angeles
192    City of Los Angeles
193    City of Los Angeles
Name: signal_owner, Length: 194, dtype: object

### Get tabular data

In [21]:
# Get signal-route grain data
signal_route_group = signals_segments_removed_duplicates.groupby(
    ["objectid", "route_short_name", "organization_name", "direction_id"]
)

signals_routes_frequency = signal_route_group["trips_hr_sch"].sum()
merged_signals_routes_frequency = signals_routes_frequency.reset_index().merge(
    caltrans_signals_filtered[
        [
            "tms_unit_type",
            "asset_sub_type",
            "location",
            "tms_id",
            "imms_id",
            "delegation_type",
            "leased_owned",
            "comment",
            "geometry",
        ]
    ],
    how="left",
    left_on="objectid",
    right_index=True,
    validate="many_to_one",
)
merged_geometry = gpd.GeoSeries(merged_signals_routes_frequency["geometry"]).to_crs(
    geography_utils.WGS84
)
merged_signals_routes_frequency["latitude"] = merged_geometry.y.round(5)
merged_signals_routes_frequency["longitude"] = merged_geometry.x.round(5)
merged_signals_routes_frequency.drop("geometry", axis=1).to_csv(
    "signals_routes.csv", index=False
)

NameError: name 'signals_segments_removed_duplicates' is not defined

In [None]:
buffered_signals.head()["Trips/Hour"]

In [None]:
# Save signal-grain data to a csv
caltrans_signals_filtered["latitude"] = caltrans_signals_filtered.geometry.y.round(5)
caltrans_signals_filtered["longitude"] = caltrans_signals_filtered.geometry.x.round(5)
caltrans_signals_filtered.rename(columns={"trips_hr_sch": "Trips/Hour"})[
    [*signals_with_transit_display_columns, "latitude", "longitude", "comment"]
].drop(caltrans_signals_filtered.geometry.name, axis=1).to_csv("signals_aggregated.csv")

In [None]:
caltrans_signals_filtered["latitude"]