In [2]:
# file: envs/traffic_env.py

import gym
from gym import spaces
import numpy as np
import os
import sys
import time
import traci
from traci.exceptions import TraCIException

from integration_pems_ems_sumo import SUTrafficEnv  # the class we wrote before


class GymTrafficEnv(gym.Env):
    """
    Gym-style wrapper around SUTrafficEnv.

    Observations: continuous vector (time of day, hours_to_next_EV, avg_flow, avg_speed, avg_occ, phases...)
    Actions: discrete phase index per TLS (Discrete for single TLS, MultiDiscrete for multiple).
    """

    metadata = {"render_modes": ["human"], "render_fps": 10}

    def __init__(
        self,
        sumo_cfg: str,
        ems_day,
        pems_day_rl,
        meta_rl,
        tls_ids,
        use_gui: bool = False,
        sim_duration_s: int = 3600,
    ):
        super().__init__()

        self.tls_ids = tls_ids
        self.num_tls = len(tls_ids)

        # underlying SUMO env
        self._env = SUTrafficEnv(
            sumo_cfg=sumo_cfg,
            ems_day=ems_day,
            pems_day_rl=pems_day_rl,
            meta_rl=meta_rl,
            tls_ids=tls_ids,
            use_gui=use_gui,
            sim_duration_s=sim_duration_s,
        )

        # ---------- ACTION SPACE ----------
        # assume each TLS has at most N phases; start with N=4 (you can adjust later)
        self.max_phases = 4
        if self.num_tls == 1:
            self.action_space = spaces.Discrete(self.max_phases)
        else:
            self.action_space = spaces.MultiDiscrete([self.max_phases] * self.num_tls)

        # ---------- OBSERVATION SPACE ----------
        # obs = [time_of_day, hours_to_next_EV, avg_flow, avg_speed, avg_occ, phases...]
        # Rough bounds:
        # time_of_day ∈ [0, 1]
        # hours_to_next_EV ∈ [0, 24] (clip)
        # avg_flow ∈ [0, 5000] (vehicles / 5 min)
        # avg_speed ∈ [0, 120] (mph)
        # avg_occ ∈ [0, 1]
        # phases ∈ [0, max_phases]
        low = np.array(
            [0.0, 0.0, 0.0, 0.0, 0.0] + [0.0] * self.num_tls,
            dtype=np.float32,
        )
        high = np.array(
            [1.0, 24.0, 5000.0, 120.0, 1.0] + [float(self.max_phases)] * self.num_tls,
            dtype=np.float32,
        )

        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        obs = self._env.reset()
        obs = self._clip_obs(obs)
        return obs.astype(np.float32), {}

    def step(self, action):
        # convert action to a list if multiple TLS
        if self.num_tls == 1:
            a = int(action)
        else:
            a = np.array(action, dtype=int).tolist()

        obs, reward, done, info = self._env.step(a)
        obs = self._clip_obs(obs)

        # gym step signature: obs, reward, terminated, truncated, info
        terminated = bool(done)
        truncated = False  # you can implement time-based truncation if you want

        return obs.astype(np.float32), float(reward), terminated, truncated, info

    def _clip_obs(self, obs):
        obs = np.array(obs, dtype=np.float32)

        # sanity clipping
        obs[0] = np.clip(obs[0], 0.0, 1.0)       # time_of_day
        if len(obs) > 1:
            obs[1] = np.clip(obs[1], 0.0, 24.0)  # hours_to_next_EV

        if len(obs) > 2:
            obs[2] = np.clip(obs[2], 0.0, 5000.0)  # avg_flow
        if len(obs) > 3:
            obs[3] = np.clip(obs[3], 0.0, 120.0)   # avg_speed
        if len(obs) > 4:
            obs[4] = np.clip(obs[4], 0.0, 1.0)     # avg_occ

        return obs

    def render(self):
        # if you started SUMO with GUI, you already "see" it; no extra render here
        pass

    def close(self):
        self._env.close()


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [9]:
import pandas as pd
import importlib
import integration_pems_ems_sumo
importlib.reload(integration_pems_ems_sumo)
import time

from integration_pems_ems_sumo import SUTrafficEnv
sumo_cfg="sumo_simulation/config.sumo.cfg"

ems_rl = pd.read_csv(
    "../data/4_transformed_dataset/transformed_emergency_logs.csv",
    parse_dates=["event_time"],
)

sim_date = pd.to_datetime("2025-01-06").date()
ems_day_df = ems_rl[ems_rl["event_time"].dt.date == sim_date].copy()

meta_rl_df = pd.read_csv("../data/4_transformed_dataset/transformed_station_metadata.csv")
pems_day_rl_df = pd.read_parquet(
    "../data/4_transformed_dataset/d04_text_station_5min_2025_01_06_rl.parquet"
)

env = GymTrafficEnv(
    sumo_cfg="config.sumo.cfg",
    ems_day=ems_day_df,
    pems_day_rl=pems_day_rl_df,
    meta_rl=meta_rl_df,
    tls_ids=["10005001961"],  # or your actual TLS id
    use_gui=False,
    sim_duration_s=3600,
)

obs, _ = env.reset()
done = False
while not done:
    action = env.action_space.sample()
    obs, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated

env.close()


[SUTrafficEnv] Launching: sumo -c config.sumo.cfg --step-length 1.0
 Retrying in 1 seconds


  logics = traci.trafficlight.getCompleteRedYellowGreenDefinition(tls_id)


Step #500.00 (18ms ~= 55.56*RT, ~24166.67UPS, TraCI: 233ms, vehicles TOT 459 ACT 435 BUF 0



Step #800.00 (22ms ~= 45.45*RT, ~29227.27UPS, TraCI: 240ms, vehicles TOT 733 ACT 643 BUF 0



Step #1100.00 (25ms ~= 40.00*RT, ~32680.00UPS, TraCI: 236ms, vehicles TOT 1011 ACT 817 BUF



Step #1600.00 (43ms ~= 23.26*RT, ~22441.86UPS, TraCI: 318ms, vehicles TOT 1463 ACT 965 BUF



Step #1700.00 (32ms ~= 31.25*RT, ~30906.25UPS, TraCI: 251ms, vehicles TOT 1557 ACT 989 BUF



Step #1900.00 (43ms ~= 23.26*RT, ~23953.49UPS, TraCI: 250ms, vehicles TOT 1743 ACT 1030 BU



Step #2000.00 (51ms ~= 19.61*RT, ~20823.53UPS, TraCI: 366ms, vehicles TOT 1835 ACT 1062 BU



Step #2100.00 (36ms ~= 27.78*RT, ~29972.22UPS, TraCI: 264ms, vehicles TOT 1930 ACT 1079 BU



Step #2300.00 (38ms ~= 26.32*RT, ~29868.42UPS, TraCI: 268ms, vehicles TOT 2118 ACT 1135 BU



Step #2400.00 (57ms ~= 17.54*RT, ~20035.09UPS, TraCI: 267ms, vehicles TOT 2207 ACT 1142 BU



Step #2500.00 (34ms ~= 29.41*RT, ~33617.65UPS, TraCI: 247ms, vehicles TOT 2297 ACT 1143 BU



Step #2700.00 (75ms ~= 13.33*RT, ~15893.33UPS, TraCI: 248ms, vehicles TOT 2481 ACT 1192 BU



Step #2800.00 (49ms ~= 20.41*RT, ~24673.47UPS, TraCI: 258ms, vehicles TOT 2574 ACT 1209 BU



Step #2900.00 (39ms ~= 25.64*RT, ~31461.54UPS, TraCI: 248ms, vehicles TOT 2665 ACT 1227 BU



Step #3000.00 (39ms ~= 25.64*RT, ~32102.56UPS, TraCI: 248ms, vehicles TOT 2762 ACT 1252 BU



Step #3100.00 (86ms ~= 11.63*RT, ~14360.47UPS, TraCI: 447ms, vehicles TOT 2850 ACT 1235 BU



Step #3300.00 (46ms ~= 21.74*RT, ~27652.17UPS, TraCI: 266ms, vehicles TOT 3039 ACT 1272 BU



Step #3400.00 (39ms ~= 25.64*RT, ~32615.38UPS, TraCI: 240ms, vehicles TOT 3131 ACT 1272 BU



Step #3500.00 (53ms ~= 18.87*RT, ~24301.89UPS, TraCI: 293ms, vehicles TOT 3219 ACT 1288 BU



Step #3600.00 (40ms ~= 25.00*RT, ~32800.00UPS, TraCI: 266ms, vehicles TOT 3309 ACT 1312 BU


In [3]:
# file: baselines/controllers.py

import numpy as np
import traci


class FixedTimeController:
    """
    Baseline 1: fixed-time traffic signal.
    Ignores EMS, just cycles phases every K simulation steps.
    """

    def __init__(self, tls_ids, phase_duration_steps=20, max_phases=4):
        self.tls_ids = tls_ids
        self.phase_duration_steps = phase_duration_steps
        self.max_phases = max_phases
        self.current_phase_idx = {tls_id: 0 for tls_id in tls_ids}
        self.step_counter = 0

    def select_action(self, obs=None):
        """
        Returns action compatible with GymTrafficEnv:
        - scalar if one TLS
        - list of actions if multiple
        """
        # advance phase every phase_duration_steps
        if self.step_counter % self.phase_duration_steps == 0 and self.step_counter > 0:
            for tls_id in self.tls_ids:
                self.current_phase_idx[tls_id] = (
                    self.current_phase_idx[tls_id] + 1
                ) % self.max_phases

        self.step_counter += 1

        if len(self.tls_ids) == 1:
            return self.current_phase_idx[self.tls_ids[0]]
        else:
            return [self.current_phase_idx[tls_id] for tls_id in self.tls_ids]


class GreedyEVPreemptionController:
    """
    Baseline 2: simple emergency preemption.
    If any EV is detected on an incoming edge for this TLS, switch/hold green toward that EV.
    Otherwise behaves like a fixed-time controller.
    """

    def __init__(
        self,
        tls_ids,
        ev_prefix="EV_",
        phase_duration_steps=20,
        max_phases=4,
        tls_phase_map=None,
    ):
        """
        tls_phase_map: optional dict mapping
            tls_id -> {phase_index: [incoming_edge_ids_for_that_phase]}
        If not provided, this example just uses phase index 0 as "EV direction".
        """
        self.tls_ids = tls_ids
        self.ev_prefix = ev_prefix
        self.phase_duration_steps = phase_duration_steps
        self.max_phases = max_phases
        self.tls_phase_map = tls_phase_map or {}

        self.current_phase_idx = {tls_id: 0 for tls_id in tls_ids}
        self.step_counter = 0

    def _ev_present_for_phase(self, tls_id, phase_idx):
        """
        Check if an EV exists on any incoming edge mapped to this phase.
        """
        phase_map = self.tls_phase_map.get(tls_id, {})
        incoming_edges = phase_map.get(phase_idx, [])

        if not incoming_edges:
            return False

        for vid in traci.vehicle.getIDList():
            if not vid.startswith(self.ev_prefix):
                continue
            # get the edge where vehicle is now
            edge_id = traci.vehicle.getRoadID(vid)
            if edge_id in incoming_edges:
                return True

        return False

    def select_action(self, obs=None):
        """
        Returns action compatible with GymTrafficEnv.
        """
        actions = []

        for tls_id in self.tls_ids:
            # 1) Check if any phase has an EV present; if so, preempt to that phase
            preempt_phase = None
            phase_map = self.tls_phase_map.get(tls_id, {})

            if phase_map:
                for phase_idx in phase_map.keys():
                    if self._ev_present_for_phase(tls_id, phase_idx):
                        preempt_phase = phase_idx
                        break

            if preempt_phase is not None:
                self.current_phase_idx[tls_id] = preempt_phase
            else:
                # 2) No EV: fixed-time cycling
                if self.step_counter % self.phase_duration_steps == 0 and self.step_counter > 0:
                    self.current_phase_idx[tls_id] = (
                        self.current_phase_idx[tls_id] + 1
                    ) % self.max_phases

            actions.append(self.current_phase_idx[tls_id])

        self.step_counter += 1

        if len(self.tls_ids) == 1:
            return actions[0]
        return actions


In [5]:
import os
import pandas as pd

ems_rl = pd.read_csv(
    "../data/4_transformed_dataset/transformed_emergency_logs.csv",
    parse_dates=["event_time"],
)

sim_date = pd.to_datetime("2025-01-06").date()

ems_day_df = ems_rl[ems_rl["event_time"].dt.date == sim_date].copy()

meta_rl_df = pd.read_csv("../data/4_transformed_dataset/transformed_station_metadata.csv")

pems_day_rl_df = pd.read_parquet(
    "../data/4_transformed_dataset/d04_text_station_5min_2025_01_06_rl.parquet"
)
# build env (same as before)
env = GymTrafficEnv(
    sumo_cfg="config.sumo.cfg",
    ems_day=ems_day_df,
    pems_day_rl=pems_day_rl_df,
    meta_rl=meta_rl_df,
    tls_ids=["10005001961"],
    use_gui=False,
    sim_duration_s=3600
)

# Baseline 1: fixed-time
fixed_agent = FixedTimeController(tls_ids=["TL_1"], phase_duration_steps=20, max_phases=4)

obs, _ = env.reset()
done = False
total_reward_fixed = 0.0

while not done:
    action = fixed_agent.select_action(obs)
    obs, reward, terminated, truncated, info = env.step(action)
    done = terminated or truncated
    total_reward_fixed += reward

env.close()
print("Fixed-time total reward:", total_reward_fixed)


[SUTrafficEnv] Launching: sumo -c config.sumo.cfg --step-length 1.0
 Retrying in 1 seconds


  logics = traci.trafficlight.getCompleteRedYellowGreenDefinition(tls_id)


Step #500.00 (15ms ~= 66.67*RT, ~29000.00UPS, TraCI: 243ms, vehicles TOT 459 ACT 435 BUF 0



Step #800.00 (21ms ~= 47.62*RT, ~30571.43UPS, TraCI: 254ms, vehicles TOT 733 ACT 642 BUF 0



Step #1100.00 (39ms ~= 25.64*RT, ~20897.44UPS, TraCI: 321ms, vehicles TOT 1011 ACT 815 BUF



Step #1400.00 (32ms ~= 31.25*RT, ~28437.50UPS, TraCI: 253ms, vehicles TOT 1277 ACT 910 BUF



Step #1700.00 (35ms ~= 28.57*RT, ~28057.14UPS, TraCI: 256ms, vehicles TOT 1557 ACT 982 BUF



Step #2000.00 (34ms ~= 29.41*RT, ~31088.24UPS, TraCI: 271ms, vehicles TOT 1834 ACT 1057 BU



Step #2100.00 (49ms ~= 20.41*RT, ~22061.22UPS, TraCI: 390ms, vehicles TOT 1930 ACT 1081 BU



Step #2300.00 (40ms ~= 25.00*RT, ~28350.00UPS, TraCI: 272ms, vehicles TOT 2118 ACT 1134 BU



Step #2400.00 (45ms ~= 22.22*RT, ~25155.56UPS, TraCI: 308ms, vehicles TOT 2207 ACT 1132 BU



Step #2500.00 (41ms ~= 24.39*RT, ~27975.61UPS, TraCI: 254ms, vehicles TOT 2297 ACT 1147 BU



Step #2700.00 (65ms ~= 15.38*RT, ~18400.00UPS, TraCI: 273ms, vehicles TOT 2481 ACT 1196 BU



Step #2800.00 (43ms ~= 23.26*RT, ~28232.56UPS, TraCI: 260ms, vehicles TOT 2574 ACT 1214 BU



Step #3000.00 (42ms ~= 23.81*RT, ~29476.19UPS, TraCI: 263ms, vehicles TOT 2762 ACT 1238 BU



Step #3100.00 (40ms ~= 25.00*RT, ~30725.00UPS, TraCI: 282ms, vehicles TOT 2850 ACT 1229 BU



Step #3400.00 (53ms ~= 18.87*RT, ~24132.08UPS, TraCI: 292ms, vehicles TOT 3131 ACT 1279 BU



Step #3500.00 (36ms ~= 27.78*RT, ~35861.11UPS, TraCI: 254ms, vehicles TOT 3219 ACT 1291 BU



Step #3600.00 (31ms ~= 32.26*RT, ~42419.35UPS, TraCI: 279ms, vehicles TOT 3309 ACT 1315 BU
Fixed-time total reward: -35048461.0
