# RL Agent traffic light control (SUMO + TraCI + PyTorch)

This notebook trains and evaluates an **RL agent** for traffic control using a simple neural network.

## Algorithm Overview

The RL Agent learning approach:
1. **Brain (Neural Network)**: A small PyTorch network that takes queue state (4 inputs) and outputs action probabilities (2 actions)
2. **State**: Normalized incoming vehicle counts from 4 directions (N, S, E, W)
3. **Action**: Binary choice - phase 0 (N-S green) or phase 2 (E-W green)
4. **Reward**: Negative of average waiting time (lower waiting = higher reward)
5. **Safety**: Yellow phase enforced between transitions

## Workflow
1. **Training Phase**: Learn policy from trial-and-error over multiple episodes
2. **Evaluation Phase**: Test trained agent on fresh scenario
3. **KPI Analysis**: Report metrics compared to baseline

In [None]:
import torch

# ==========================================
# DEVICE SELECTION (CPU/GPU)
# ==========================================
# Check if CUDA (NVIDIA GPU) is available
if torch.cuda.is_available():
    print("CUDA is available!")
    print(f"GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"Number of GPUs: {torch.cuda.device_count()}")
    
    # Ask user for device selection
    use_gpu = input("\nDo you want to use GPU for training? (y/n): ").strip().lower()
    if use_gpu == 'y':
        device = torch.device('cuda')
        print(f"\n✓ Using GPU: {torch.cuda.get_device_name(0)}")
    else:
        device = torch.device('cpu')
        print("\n✓ Using CPU")
else:
    print("CUDA is not available. Using CPU.")
    device = torch.device('cpu')
    print("\n✓ Using CPU")

print(f"\nSelected device: {device}")
print("="*70)

In [None]:
import os
import sys
import torch
import torch.nn as nn
import torch.optim as optim
import time
import subprocess
import socket

# ==========================================
# 1. NEURAL NETWORK BRAIN
# ==========================================
class TrafficBrain(nn.Module):
    """Simple 2-layer neural network for traffic light control."""
    def __init__(self, input_size=4, hidden_size=32, output_size=2, device='cpu'):
        super(TrafficBrain, self).__init__()
        self.device = device
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return torch.softmax(x, dim=-1)
    
    def save(self, filepath):
        os.makedirs(os.path.dirname(filepath), exist_ok=True)
        torch.save(self.state_dict(), filepath)
        print(f"Model saved to {filepath}")
    
    def load(self, filepath, device='cpu'):
        if os.path.exists(filepath):
            self.load_state_dict(torch.load(filepath, map_location=device))
            print(f"Model loaded from {filepath}")
            return True
        return False


# ==========================================
# 2. CONFIGURATION (RL TRAINING)
# ==========================================
BASE_DIR = os.getcwd()
SCENARIO_DIR = os.path.join(BASE_DIR, "Network with RL control")
RESULTS_DIR = os.path.join(SCENARIO_DIR, "results")
CONFIG_PATH = os.path.join(SCENARIO_DIR, "ff_heterogeneous.sumocfg")
MODEL_DIR = os.path.join(SCENARIO_DIR, "models")
MODEL_PATH = os.path.join(MODEL_DIR, "traffic_agent.pth")

if not os.path.exists(SCENARIO_DIR):
    # Create directory structure if it doesn't exist
    print(f"Creating scenario folder: {SCENARIO_DIR}")
    os.makedirs(SCENARIO_DIR, exist_ok=True)
    # Copy network files from original network
    import shutil
    original_dir = os.path.join(BASE_DIR, "Original network")
    for file in ["ff_heterogeneous.sumocfg", "ff.net.xml", "ff_heterogeneous.rou.xml"]:
        src = os.path.join(original_dir, file)
        dst = os.path.join(SCENARIO_DIR, file)
        if os.path.exists(src):
            shutil.copy(src, dst)
            print(f"Copied {file}")

if not os.path.exists(RESULTS_DIR):
    print(f"Creating results folder: {RESULTS_DIR}")
    os.makedirs(RESULTS_DIR, exist_ok=True)

if not os.path.exists(CONFIG_PATH):
    sys.exit(f"Config file not found: {CONFIG_PATH}")

# SUMO tools
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    sys.exit("Please declare environment variable 'SUMO_HOME'")

import traci

# SUMO binary resolution (headless mode for faster training)
sumoBinary = "sumo"
if 'SUMO_HOME' in os.environ:
    candidate = os.path.join(os.environ['SUMO_HOME'], 'bin', 'sumo.exe')
    if os.path.exists(candidate):
        sumoBinary = candidate
    else:
        candidate = os.path.join(os.environ['SUMO_HOME'], 'bin', 'sumo')
        if os.path.exists(candidate):
            sumoBinary = candidate

if not os.path.exists(sumoBinary):
    try:
# Logs - save in results folder
sumo_log = os.path.join(RESULTS_DIR, "sumo_log.txt")
sumo_err = os.path.join(RESULTS_DIR, "sumo_err.txt")
traci_stdout = os.path.join(RESULTS_DIR, "traci_stdout.txt")

# Output files - save in results folder
EDGE_DATA_PATH = os.path.join(RESULTS_DIR, "edge_data.xml")
# Logs
sumo_log = os.path.join(SCENARIO_DIR, "sumo_log.txt")
sumo_err = os.path.join(SCENARIO_DIR, "sumo_err.txt")
traci_stdout = os.path.join(SCENARIO_DIR, "traci_stdout.txt")

# Output files
EDGE_DATA_PATH = os.path.join(SCENARIO_DIR, "edge_data.xml")


def get_free_port():
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        s.bind(("localhost", 0))
        return s.getsockname()[1]


# Training parameters
NUM_EPISODES = 1000  # Number of training episodes
EPISODE_DURATION = 3600  # seconds
YELLOW_DURATION = 3  # seconds
MIN_GREEN = 5  # seconds
    output_file = os.path.join(RESULTS_DIR, f"tripinfo_{mode}_ep{episode_num}.xml")
DISCOUNT_FACTOR = 0.99

TLS_IDS = ["E1", "E2", "E3", "E4"]


def start_sumo(episode_num, mode="train"):
    output_file = os.path.join(SCENARIO_DIR, f"tripinfo_{mode}_ep{episode_num}.xml")
    log_handle = open(traci_stdout, "a", encoding="utf-8")
        "--emission-output", os.path.join(RESULTS_DIR, f"emissions_{mode}_ep{episode_num}.xml"),
        "--edgedata-output", os.path.join(RESULTS_DIR, f"edge_data_{mode}_ep{episode_num}.xml"),
        sumoBinary,
        "-c", CONFIG_PATH,
        "--quit-on-end",
        "--no-step-log",  # Disable step logging for faster execution
        "--tripinfo-output", output_file,
        "--emission-output", os.path.join(SCENARIO_DIR, f"emissions_{mode}_ep{episode_num}.xml"),
        "--edgedata-output", os.path.join(SCENARIO_DIR, f"edge_data_{mode}_ep{episode_num}.xml"),
        "--log", sumo_log,
        "--error-log", sumo_err,
        "--remote-port", str(get_free_port())
    ]
    
    proc = subprocess.Popen(
        sumoCmd,
        cwd=SCENARIO_DIR,
        stdout=log_handle,
        stderr=log_handle
    )
    return proc, log_handle, sumoCmd[-1]


def connect_traci(port_str, timeout_s=10):
    port = int(port_str)
    deadline = time.time() + timeout_s
    last_error = None
    while time.time() < deadline:
        try:
            return traci.connect(port=port, host="localhost", numRetries=0, waitBetweenRetries=0)
        except Exception as e:
            last_error = e
            time.sleep(0.2)
    raise RuntimeError(f"Could not connect. Last error: {last_error}")


def set_safe_phase(conn, tls_id, target_phase):
    """
    Safely transition to target phase with yellow light.
    Returns: timesteps spent in yellow
    """
    current_phase = conn.trafficlight.getPhase(tls_id)
    if current_phase == target_phase:
        return 0
    
    # Go to yellow phase
    conn.trafficlight.setPhase(tls_id, current_phase + 1)
    for _ in range(YELLOW_DURATION):
        conn.simulationStep()
    
    # Go to target phase
    conn.trafficlight.setPhase(tls_id, target_phase)
    return YELLOW_DURATION


def get_state(conn):
    """
    Get current state: queue lengths on incoming lanes normalized by 50.
    Returns: torch.FloatTensor of shape (1,) on the selected device
    """
    # Count vehicles in queue (halting) on each approach
    lanes = conn.trafficlight.getControlledLanes("E1")
    queue_count = sum(conn.lane.getLastStepHaltingNumber(lane) for lane in lanes)
    
    # Simple state: normalize queue count and move to device
    state = torch.FloatTensor([queue_count / 50.0]).to(device)
    return state


def get_reward(conn):
    """
    Calculate reward based on queue length.
    Reward = -queue_count (we want to minimize queues).
    Lower queue = higher reward.
    """
    lanes = conn.trafficlight.getControlledLanes("E1")
    queue_count = sum(conn.lane.getLastStepHaltingNumber(lane) for lane in lanes)
    
    # Reward is negative of queue count: fewer vehicles waiting = higher reward
    reward = -float(queue_count)
    return reward


def train_rl_agent():
    print("\n" + "="*70)
    print("RL AGENT TRAINING STARTED (HEADLESS MODE)")
    print("="*70)
    print(f"Training {NUM_EPISODES} episodes without GUI visualization")
    print("This will be significantly faster than GUI mode.")
    print(f"Using device: {device}")
    
    # Initialize brain and optimizer
    brain = TrafficBrain(input_size=1, hidden_size=16, output_size=2, device=device).to(device)
    optimizer = optim.Adam(brain.parameters(), lr=LEARNING_RATE)
    
    training_log = []
    
    for episode in range(NUM_EPISODES):
        print(f"\n--- Episode {episode + 1}/{NUM_EPISODES} ---")
        
        proc = None
        log_handle = None
        conn = None
        port_str = None
        
        try:
            proc, log_handle, port_str = start_sumo(episode, mode="train")
            time.sleep(1)  # Reduced wait time for headless mode
            conn = connect_traci(port_str, timeout_s=10)
            
            # Initialize traffic light
            for tls in TLS_IDS:
                conn.trafficlight.setProgram(tls, "0")
            
            episode_reward = 0
            episode_steps = 0
            phase_times = {tls: 0 for tls in TLS_IDS}
            action_history = []
            
            brain.train()
            
            while conn.simulation.getTime() <= EPISODE_DURATION:
                conn.simulationStep()
                sim_time = conn.simulation.getTime()
                
                # Decision every 10 steps (not every step to reduce overhead)
                if int(sim_time) % 10 == 0:
                    state = get_state(conn)
                    reward = get_reward(conn)
                    episode_reward += reward
                    
                    # Forward pass through brain
                    with torch.no_grad():
                        probs = brain(state)
                    
                    # Choose action (0=N-S, 1=E-W)
                    action = torch.argmax(probs).item()
                    target_phase = 0 if action == 0 else 2
                    
                    # Check if enough time in current phase
                    if phase_times["E1"] >= MIN_GREEN:
                        # Apply action
                        set_safe_phase(conn, "E1", target_phase)
                        phase_times["E1"] = 0
                    else:
                        phase_times["E1"] += 1
                    
                    action_history.append(action)
                    
                    # Training step: policy gradient
                    probs = brain(state)  # Recompute for gradient
                    action_prob = probs[action]
                    loss = -torch.log(action_prob + 1e-8) * reward
                    
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                
                episode_steps += 1
            
            avg_episode_reward = episode_reward / episode_steps if episode_steps > 0 else 0
            training_log.append({
                'episode': episode + 1,
                'avg_reward': avg_episode_reward,
                'total_reward': episode_reward,
                'steps': episode_steps
            })
            
            print(f"Episode {episode + 1} completed: avg_reward = {avg_episode_reward:.4f}")
            
            # Save model periodically
            if (episode + 1) % 50 == 0:
                checkpoint_path = os.path.join(MODEL_DIR, f"traffic_agent_ep{episode + 1}.pth")
                brain.save(checkpoint_path)
            
            if conn is not None:
                conn.close()
            if proc is not None:
                proc.wait(timeout=5)
        
        except Exception as e:
            print(f"Episode {episode + 1} failed: {e}")
        
        finally:
            if conn is not None:
                try:
                    conn.close()
                except:
                    pass
            if proc is not None and proc.poll() is None:
                proc.terminate()
            if log_handle:
                log_handle.close()
    
    # Save trained model
    brain.save(MODEL_PATH)
    
    print("\n" + "="*70)
    print("TRAINING COMPLETED")
    print("="*70)
    
    return brain, training_log


# ==========================================
# 3. EXECUTION
# ==========================================

try:    print(f"Training failed: {e}")

    brain, training_log = train_rl_agent()except Exception as e:

    print(f"\nTraining log summary:")        print(f"  Episode {log_entry['episode']}: avg_reward = {log_entry['avg_reward']:.4f}")
    for log_entry in training_log:

Using SUMO binary: C:\Program Files (x86)\Eclipse\Sumo\bin\sumo-gui.exe

RL AGENT TRAINING STARTED

--- Episode 1/5 ---
Episode 1 completed: avg_reward = -14.3365

--- Episode 2/5 ---
Episode 2 completed: avg_reward = -15.1301

--- Episode 3/5 ---
Episode 3 completed: avg_reward = -13.7541

--- Episode 4/5 ---
Episode 4 completed: avg_reward = -14.7695

--- Episode 5/5 ---
Episode 5 completed: avg_reward = -13.5166
Model saved to c:\Users\antoi\OneDrive\Documents\Documents\Devoirs\Études sup\ENTPE 3A\Majeure transports\Mobility Control and Management\Project\Livrable 2\MOCOM_project_2\Network with RL control\models\traffic_agent.pth

TRAINING COMPLETED

Training log summary:
  Episode 1: avg_reward = -14.3365
  Episode 2: avg_reward = -15.1301
  Episode 3: avg_reward = -13.7541
  Episode 4: avg_reward = -14.7695
  Episode 5: avg_reward = -13.5166


## Evaluation Phase

Test the trained agent on a fresh scenario and compare against baseline.

In [None]:
# ==========================================
# EVALUATION: Run trained agent
# ==========================================

def evaluate_rl_agent():
    print("\n" + "="*70)
    print("RL AGENT EVALUATION")
    print("="*70)
    print(f"Using device: {device}")
    
    # Load trained model
    brain = TrafficBrain(input_size=1, hidden_size=16, output_size=2, device=device).to(device)
    if not brain.load(MODEL_PATH, device=device):
        print("ERROR: Trained model not found! Run training cell first.")
        return
    
    brain.eval()
    
    proc = None
    log_handle = None
    conn = None
    
    try:
        proc, log_handle, port_str = start_sumo(episode_num=999, mode="eval")
        time.sleep(2)
        conn = connect_traci(port_str, timeout_s=10)
        
        # Initialize traffic light
        for tls in TLS_IDS:
            conn.trafficlight.setProgram(tls, "0")
        
        phase_times = {tls: 0 for tls in TLS_IDS}
        
        print("\nRunning evaluation simulation...")
        
        while conn.simulation.getTime() <= EPISODE_DURATION:
            conn.simulationStep()
            sim_time = conn.simulation.getTime()
            
            # Decision every 10 steps
            if int(sim_time) % 10 == 0:
                state = get_state(conn)
                
                # Forward pass (no gradient needed)
                with torch.no_grad():
                    probs = brain(state)
                    action = torch.argmax(probs).item()
                
                target_phase = 0 if action == 0 else 2
                
                if phase_times["E1"] >= MIN_GREEN:
                    set_safe_phase(conn, "E1", target_phase)
                    phase_times["E1"] = 0
                else:
                    phase_times["E1"] += 1
        
        if conn is not None:
            conn.close()
        if proc is not None:
            proc.wait(timeout=5)
        
        print("Evaluation completed successfully!")
    
    except Exception as e:
        print(f"Evaluation failed: {e}")
    
    finally:
        if conn is not None:
            try:
                conn.close()
            except:
                pass
        if proc is not None and proc.poll() is None:
            proc.terminate()
        if log_handle:
            log_handle.close()


try:
    evaluate_rl_agent()
except Exception as e:
    print(f"Evaluation error: {e}")


RL AGENT EVALUATION
Model loaded from c:\Users\antoi\OneDrive\Documents\Documents\Devoirs\Études sup\ENTPE 3A\Majeure transports\Mobility Control and Management\Project\Livrable 2\MOCOM_project_2\Network with RL control\models\traffic_agent.pth

Running evaluation simulation...
Evaluation completed successfully!


## KPI summary for the RL agent scenario

This section reads the evaluation outputs and reports key indicators plus visualizations.

In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd
import matplotlib.pyplot as plt

# ==========================================
# 1. HELPERS
# ==========================================
BASE_DIR = os.getcwd()
SCENARIO_DIR = os.path.join(BASE_DIR, "Network with RL control")
RESULTS_DIR = os.path.join(SCENARIO_DIR, "results")
EDGE_DATA_PATH = os.path.join(RESULTS_DIR, "edge_data_eval_ep999.xml")


def parse_tripinfo(results_folder, filename_pattern="tripinfo_eval_ep999.xml"):
    path = os.path.join(results_folder, filename_pattern)
    if not os.path.exists(path):
        print(f"Warning: {path} not found")
        return pd.DataFrame()
    
    tree = ET.parse(path)
    root = tree.getroot()
    trips = []
    for trip in root.findall('tripinfo'):
        trips.append({
            'id': trip.get('id'),
            'duration': float(trip.get('duration')),
            'waitingTime': float(trip.get('waitingTime')),
            'timeLoss': float(trip.get('timeLoss'))
        })
    return pd.DataFrame(trips)

def parse_emissions(results_folder, filename_pattern="emissions_eval_ep999.xml"):
    path = os.path.join(results_folder, filename_pattern)
    path = os.path.join(folder, filename_pattern)
    if not os.path.exists(path):
        return None
    
    tree = ET.parse(path)
    root = tree.getroot()
    total = {'CO2': 0, 'NOx': 0, 'fuel': 0}
    for timestep in root.findall('timestep'):
        for veh in timestep.findall('vehicle'):
            total['CO2'] += float(veh.get('CO2', 0))
            total['NOx'] += float(veh.get('NOx', 0))
            total['fuel'] += float(veh.get('fuel', 0))
    return total


def parse_edge_data(path):
    if not os.path.exists(path):
        return {}
    
    tree = ET.parse(path)
    root = tree.getroot()
    intervals = root.findall('interval')
    if not intervals:
        return {}
    
    last_interval = intervals[-1]
    edge_stats = {}
    for edge in last_interval.findall('edge'):
        e_id = edge.get('id')
        edge_stats[e_id] = {
            'speed': float(edge.get('speed')),
            'waiting': float(edge.get('waitingTime')),
            'density': float(edge.get('density'))
        }
    return edge_stats


# ==========================================
# 2. KPI SUMMARY
print(f"Reading outputs from: {RESULTS_DIR}")
print(f"Reading outputs from: {SCENARIO_DIR}")
df = parse_tripinfo(RESULTS_DIR)
emissions = parse_emissions(RESULTS_DIR)
emissions = parse_emissions(SCENARIO_DIR)
edge_stats = parse_edge_data(EDGE_DATA_PATH)

if len(df) == 0:
    print("No trip data available. Make sure evaluation simulation completed.")
else:
    print("\n" + "=" * 70)
    print("KPI SUMMARY (RL AGENT)")
    print("=" * 70)
    print(f"Avg. Travel Time: {df['duration'].mean():.2f} s")
    print(f"Avg. Waiting Time: {df['waitingTime'].mean():.2f} s")
    print(f"Total Throughput: {len(df)} vehicles")

    if emissions:
        print("-" * 70)
        print(f"Total CO2 Emissions: {emissions['CO2'] / 1000000:.2f} kg")
        print(f"Total NOx: {emissions['NOx']:.2f} g")
        print(f"Total Fuel: {emissions['fuel']:.2f} mg")

    if edge_stats:
        print("\n" + "=" * 70)
        print("EDGE SUMMARY (E0->E5 and E5->E0)")
        print("=" * 70)

        edges_forward = ["E0E1", "E1E2", "E2E3", "E3E4", "E4E5"]
        edges_backward = ["E5E4", "E4E3", "E3E2", "E2E1", "E1E0"]

        for direction, edges in [("E0->E5", edges_forward), ("E5->E0", edges_backward)]:
            print(f"\n{direction}")
            for e in edges:
                stats = edge_stats.get(e, {'speed': 0, 'waiting': 0, 'density': 0})
                speed_kmh = stats['speed'] * 3.6  # Convert m/s to km/h
                print(f"{e:<6} | speed: {speed_kmh:.2f} km/h | waiting: {stats['waiting']:.2f} s | density: {stats['density']:.2f} veh/km")

    # ==========================================
    # 3. VISUALIZATION
    # ==========================================
    plt.figure(figsize=(7, 5))
    plt.hist(df['waitingTime'], bins=30, alpha=0.7, color='green')
    plt.title('Waiting Time Distribution (RL Agent)')
    plt.xlabel('Seconds')
    plt.ylabel('Number of Vehicles')
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.tight_layout()
    plot_path = os.path.join(RESULTS_DIR, "rl_waiting_time.png")
    plot_path = os.path.join(SCENARIO_DIR, "rl_waiting_time.png")
    plt.savefig(plot_path, dpi=300)
    plt.show()
    print(f"\nSaved plot to: {plot_path}")