In [1]:
# Import the required packages and libs.
from single_intersection import TrafficEnv
import numpy as np
import os 
import matplotlib.pyplot as plt
import time
from sumo_rl import SumoEnvironment
import torch
# Print the sumo environment path for further verification 
print("SUMO HOME:", os.environ.get("SUMO_HOME"))
# SUMO HOME: /opt/miniconda3/lib/python3.13/site-packages/sumo

SUMO HOME: /opt/miniconda3/lib/python3.13/site-packages/sumo


## Heuristic baseline

In [None]:
import csv
import numpy as np
import time

def to_evaluate_agent(
        env=None,
        agent="heuristic",
        steps=1000,
        phase_duration=10,
        render=False,
        seed=42,
        to_save = None):
    """ Evaluate the performance of a method in given SUMO env."""

    # Initialization  ->  file saving 
    sim_records = []
    header = [
        "step", 
        "sim_time",
        "avg_wait_time", 
        # "total_wait_time",
        "queue_length",
        "pressure",
        "throughput",
        "avg_speed",
        "action",
        "reward"]
    
    # Initialization -> simulation loops
    obs, info = env.reset(seed=seed)
    cur_phase, phase_timer, last_arrived = 0,0,0
    done = False 

    for step in range(steps):
        if render: env.render()

        # debug 
        # print("Phase Time" , phase_timer, "Current Phase: ", cur_phase)

        #################################
        ###  Action Selection ###
        #################################
        if agent == "heuristic":
            # Determine the action
            if phase_timer >= phase_duration: # Pre-defined heuristic method
                cur_phase = (cur_phase + 1) % env.action_space.n
                phase_timer = 0
            action = cur_phase
            phase_timer += 1
        elif agent == "random": # Random Method 
            action = env.action_space.sample()
        # else: # Trained RL Agent 
        #     # action, _ 

        #################################
        ###  Feed action and observe ###
        #################################
        obs, reward, done, _, info = env.step(action)
        sim_time = env.sumo.simulation.getTime()
        # veh_ids = env.sumo.simulation.getIDList()
        # avg_wait_time = info["avg_wait_time"] # Not sure if it is available


        #######################################
        ###  Collect and store the metrics ###
        #######################################
        # wait_time_lst = []
        # queue_length = 0
        # lane_veh_counts = {}

        # for veh in veh_ids:
        #     cur_lane = env.sumo.vehicles.getLaneID(veh)
        #     cur_speed = env.sumo.vehicles.getSpeed(veh)
        #     cur_wait_time = env.sumo.vehicles.getWaitingTime(veh)

        #     if cur_speed < 0.1:
        #         queue_length += 1
        #     lane_veh_counts[cur_lane] = lane_veh_counts.get(cur_lane, 0) + 1

        #     # ====== Wait-Time ======
        #     wait_time_lst.append(cur_wait_time)
        #     # ========================

        # # Pressure
        # pressure = 0
        # for lane, count in lane_veh_counts.items():
        #     if 1:
        #         # num_incoming += count
        #         pressure += count
        #     else:
        #         num_outgoing += count
        #         pressure -= count

        # Avg wait and total wait
        # avg_wait = np.mean(wait_time_lst) if wait_time_lst else 0
        # total_wait = np.sum(wait_time_lst) if wait_time_lst else 0

        # Avg speed and total speed
        # avg_speed = np.mean(env.sumo.vehicles.getSpeed(vid) for vid in veh_ids)
        # total_speed = np.sum(env.sumo.vehicles.getSpeed(vid) for vid in veh_ids)

        # Throughput
        # total_arrived = env.sumo.simulation.getArrivedNumber()
        # throughput = total_arrived - last_arrived
        # last_arrived = total_arrived

        # Load the records into log
        sim_records.append([
            step,
            sim_time,
            info["waiting_time"],
            info["queue_length"],
            info["pressure"],
            info["throughput"],
            info["avg_speed"],
            action,
            reward])

        if done:
            obs, info = env.reset(seed=seed)
            last_arrived = 0
        
    env.close()

    if to_save:
        # supposed: results/***_evaluation_records.csv 
        folder = "results"
        os.makedirs(folder, exist_ok=True)

        save_path = os.path.join(folder, f"{to_save}_evaluation_records.csv")

        with open(save_path, "w", newline="") as f:
            writer = csv.writer(f)
            writer.writerow(header)
            writer.writerows(sim_records)
        
        print(f"Evaluation records saved to: {save_path}")

        
        return sim_records



In [None]:
sumo_cmd = [
    # "--start", # Uncomment this line while using the GUI for visualization 
    "--no-warnings", "true", # Uncomment this line to 
    "-n", "single-intersection.net.xml",
    "-r", "single-intersection-vertical.rou.xml",
    "--step-length", "1.0"
]

TLS_ID = "t"    

# import traci
# traci.close(False)

# Initialize SUMO environment
env = TrafficEnv(
    sumo_cmd=sumo_cmd,
    tls_id=TLS_ID,
    gui=False   # show SUMO GUI
)

logs = to_evaluate_agent(
    env=env,
    agent="heuristic",
    steps=100,
    to_save="test"
)

In [None]:
## Define the training function

# import torch.optim as optim
# from ppo import ActorCritic, compute_gae, collect_rollout
# from torch import nn
# from helper_func import plot_traff_metrics

# Integrate into train.py

## Training Section

### Here we build the different environment first

In [None]:
# Import the required packages and libs.
from single_intersection import TrafficEnv
from train import train_ppo
import torch.optim as optim
from ppo import ActorCritic, compute_gae, collect_rollout
from torch import nn
from helper_func import plot_traff_metrics


# SUMO command (headless for speed)
sumo_cmd = [
    # "--start", # Uncomment this line while using the GUI for visualization 
    "--no-warnings", "true", # Uncomment this line to 
    "-n", "single-intersection.net.xml",
    "-r", "single-intersection-vertical.rou.xml",
    "--step-length", "1.0"
]

TLS_ID = "t"    

# Environment with noise 
env_w_noise = TrafficEnv(
    sumo_cmd=sumo_cmd,
    tls_id=TLS_ID,
    gui=False,
    noise=True,
    noise_sigma=1.0
)

# Check if the environment is working 
obs, info = env.reset()
print(f"Environment {env.__class__.__name__} setup complete.")

# Basic Envivronment without noise (default)
env = TrafficEnv(
    sumo_cmd=sumo_cmd,
    tls_id=TLS_ID,
    gui=False   # show SUMO GUI
)

# Check if the environment is working 
obs, info = env.reset()
print(f"Environment {env.__class__.__name__} setup complete. ")


In [2]:
# Run experiments across all combinations:
# 1. Noise v.s. Without Noise
# 2. Heuristic v.s. RL Agent
# 3. Different Reward Function 

from run_experiments import run_experiments

# SUMO command configuration
sumo_cmd = [
    "--no-warnings", "true",
    "-n", "single-intersection.net.xml",
    "-r", "single-intersection-vertical.rou.xml",
    "--step-length", "1.0"
]

TLS_ID = "t"

# Run all experiments
# This will iterate through all combinations:
# - Noise: True/False (2 options)
# - Agent: "heuristic"/"rl" (2 options)  
# - Reward configs: 4 different configurations
# Total: 2 x 2 x 4 = 16 experiments

# queue_reduction， queue_abs， pressure， switch_penalty， throughput
# reward_configs = [
#     ("default", 1.0, 0.3, 0.15, 0.01, 0.005),
#     ("queue_focused", 2.0, 0.5, 0.1, 0.05, 0.01),
#     ("pressure_focused", 1.0, 0.2, 0.3, 0.05, 0.01),
#     ("throughput_focused", 0.5, 0.2, 0.1, 0.05, 0.02),
# ]

reward_configs = [
    ("default", 1.0, 0.3, 0.15, 0.05, 0.005)
]

noise_options = [True, False]

train_model_configs = {
    "LR": 3e-4,
    "N_STEPS": 256,
    "N_EPOCHS": 4,
    "MINI_BATCH_SIZE": 64,
    "TOTAL_TIMESTEPS": 4096*4
}

results = run_experiments(
    sumo_cmd=sumo_cmd,
    tls_id=TLS_ID,
    reward_configs=reward_configs,
    noise_options=noise_options,
    train_kwargs=train_model_configs,
    eval_steps=1000,      # Evaluation steps per experiment
    # train_timesteps=50000, # Training timesteps for RL agents (only used for RL experiments)
    seed=42
)


print(f"\nTotal experiments completed: {len(results)}")
print("Check 'results/experiments_summary.csv' for summary of all experiments")




STAGE 1: Finding best reward function and noise combination (RL agent)


Stage 1 - Experiment 1: Noise=True, Reward=default
 Retrying in 1 seconds


  num_phases = len(self.sumo.trafficlight.getCompleteRedYellowGreenDefinition(self.ts_id)[0].phases)


Training RL agent...
 Retrying in 1 seconds
Debug: Reached 0 steps
 Retrying in 1 seconds
[Training] Step 256/16384 (1.6%) | Reward: -29.495 | Waiting time: 0.157 | Queue length: 1.461 | Throughput: 0.056
 Retrying in 1 seconds
[Training] Step 512/16384 (3.1%) | Reward: -24.752 | Waiting time: 0.203 | Queue length: 1.253 | Throughput: 0.059
 Retrying in 1 seconds
[Training] Step 768/16384 (4.7%) | Reward: -31.635 | Waiting time: 0.217 | Queue length: 1.583 | Throughput: 0.066
 Retrying in 1 seconds
[Training] Step 1024/16384 (6.2%) | Reward: -35.159 | Waiting time: 0.308 | Queue length: 1.864 | Throughput: 0.047
 Retrying in 1 seconds
[Training] Step 1280/16384 (7.8%) | Reward: -32.408 | Waiting time: 0.217 | Queue length: 1.605 | Throughput: 0.059
 Retrying in 1 seconds
[Training] Step 1536/16384 (9.4%) | Reward: -30.720 | Waiting time: 0.243 | Queue length: 1.631 | Throughput: 0.054
 Retrying in 1 seconds
[Training] Step 1792/16384 (10.9%) | Reward: -30.998 | Waiting time: 0.235 | Qu

KeyboardInterrupt: 

Traceback (most recent call last):
  File [35m"/opt/miniconda3/bin/sumo"[0m, line [35m8[0m, in [35m<module>[0m
    sys.exit([31msumo[0m[1;31m()[0m)
             [31m~~~~[0m[1;31m^^[0m
  File [35m"/opt/miniconda3/lib/python3.13/site-packages/sumo/__init__.py"[0m, line [35m28[0m, in [35m<lambda>[0m
    return lambda: sys.exit([31msubprocess.call[0m[1;31m([os.path.join(SUMO_HOME, 'bin', app)] + sys.argv[1:], env=os.environ)[0m)
                            [31m~~~~~~~~~~~~~~~[0m[1;31m^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^[0m
  File [35m"/opt/miniconda3/lib/python3.13/subprocess.py"[0m, line [35m397[0m, in [35mcall[0m
    return [31mp.wait[0m[1;31m(timeout=timeout)[0m
           [31m~~~~~~[0m[1;31m^^^^^^^^^^^^^^^^^[0m
  File [35m"/opt/miniconda3/lib/python3.13/subprocess.py"[0m, line [35m1280[0m, in [35mwait[0m
    return [31mself._wait[0m[1;31m(timeout=timeout)[0m
           [31m~~~~~~~~~~[0m[1;31m^^^^^^

In [None]:
# model_params  = {
#     "obs_dim": env.observation_space.shape[0],
#     "act_dim": env.action_space.n
# }

# ppo_model = ActorCritic(**model_params)
# print("Model initialized. Starting training...")

# train_params = {
#     "model": ppo_model,
#     "env": env
# }

# model_hist = train_ppo(**train_params)
