In [1]:
%%writefile config.py
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

# Parameters (separated from init)
alpha = 0.1
gamma = 0.9
epsilon = 1.0
epsilon_decay = 0.995
epsilon_min = 0.01
train_episodes = 400
test_episodes = 200
sumoBinary = "sumo"
sumoConfig = r"C:\Users\Administrator\Desktop\MySumo\stage 9(DRM with low congestion)\SUMO files\stage9.sumocfg"


Writing config.py


In [2]:
%%writefile environment.py
import traci

class Environment:
    def __init__(self, config, sumo_binary):
        self.sumo_binary = sumo_binary
        self.sumo_config = config
        self.action_space = self.define_action_space()

    def define_action_space(self):
        return {
            0: (24, 8, 8, 24),   1: (24, 8, 24, 8),    2: (32, 8, 16, 8),    3: (40, 8, 8, 8),     4: (24, 8, 16, 16),
            5: (24, 24, 8, 8),   6: (8, 32, 16, 8),    7: (16, 32, 8, 8),    8: (32, 8, 8, 16),    9: (8, 32, 8, 16),
            10: (8, 24, 24, 8),  11: (32, 16, 8, 8),   12: (8, 16, 24, 16),  13: (24, 16, 8, 16),  14: (8, 24, 16, 16),
            15: (8, 16, 8, 32),  16: (8, 16, 16, 24),  17: (8, 16, 32, 8),   18: (24, 16, 16, 8),  19: (8, 24, 8, 24),
            20: (16, 8, 24, 16), 21: (8, 40, 8, 8),    22: (8, 8, 40, 8),    23: (16, 8, 16, 24),  24: (8, 8, 32, 16),
            25: (8, 8, 16, 32),  26: (16, 8, 8, 32),   27: (8, 8, 8, 40),    28: (8, 8, 24, 24),   29: (16, 8, 32, 8),
            30: (16, 24, 16, 8), 31: (16, 16, 24, 8),  32: (16, 16, 16, 16), 33: (16, 24, 8, 16),  34: (16, 16, 8, 24)
        }

    def start_simulation(self):
        traci.start([self.sumo_binary, "-c", self.sumo_config])

    def close_simulation(self):
        traci.close()

    def compute_waiting_time(self):
        waiting_time = {}
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        for lane in controlled_lanes:
            waiting_time[lane] = traci.lane.getWaitingTime(lane)
        return waiting_time

    def compute_queue_length(self):
        """
        New method to compute queue lengths for each controlled lane using the
        number of vehicles with very low speeds (i.e., effectively stopped).
        """
        queue_length = {}
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        for lane in controlled_lanes:
            queue_length[lane] = traci.lane.getLastStepHaltingNumber(lane)
        return queue_length

    def update_traffic_light_program(self, junction_id, green_durations):
        if len(green_durations) != 4:
            raise ValueError("Must provide exactly 4 duration values")
        if not all(isinstance(x, int) and x > 0 for x in green_durations):
            raise ValueError("All durations must be positive integers")
        yellow_duration = 4
        phases = []
        states = [
            "GGgrrrrrrrrr", "yyyrrrrrrrrr",
            "rrrGGgrrrrrr", "rrryyyrrrrrr",
            "rrrrrrGGgrrr", "rrrrrryyyrrr",
            "rrrrrrrrrGGg", "rrrrrrrrryyy"
        ]
        for i, green_duration in enumerate(green_durations):
            phases.append(traci.trafficlight.Phase(
                duration=green_duration,
                state=states[i*2],
                minDur=green_duration,
                maxDur=green_duration
            ))
            phases.append(traci.trafficlight.Phase(
                duration=yellow_duration,
                state=states[i*2 + 1],
                minDur=yellow_duration,
                maxDur=yellow_duration
            ))
        logic = traci.trafficlight.Logic(
            programID="1",
            type=0,
            currentPhaseIndex=0,
            phases=phases
        )
        traci.trafficlight.setProgramLogic(junction_id, logic)

    def run_program_0(self):
        self.start_simulation()
        total_waiting_time = 0.0
        vehicle_counts = set()
        for _ in range(3600):
            traci.simulationStep()
            total_waiting_time += sum(self.compute_waiting_time().values())
            current_vehicles = set(traci.vehicle.getIDList())
            vehicle_counts.update(current_vehicles)
        total_vehicles = len(vehicle_counts)
        waiting_time_per_vehicle = total_waiting_time / total_vehicles if total_vehicles > 0 else 0
        self.close_simulation()
        print(f"\nDefault Program Results:")
        print(f"Total Wait Time: {total_waiting_time:.1f}")
        print(f"Vehicles: {total_vehicles}")
        print(f"Wait Time per Vehicle: {waiting_time_per_vehicle:.1f}")
        return total_waiting_time, total_vehicles, waiting_time_per_vehicle

    def set_traffic_light_program(self, action):
        timings = self.action_space[action]
        self.update_traffic_light_program("J1", timings)
        for _ in range(80):
            traci.simulationStep()
        return 80

    def discretized_state(self, queue_lengths):
        low_threshold = 27
        high_threshold = 64
        val = 0
        count = 0
        for q in queue_lengths:
            if q <= low_threshold:
                val += 0 * 3 ** count
            elif q <= high_threshold:
                val += 1 * 3 ** count
            else:
                val += 2 * 3 ** count
            count += 1
        return val

    def get_num_actions(self):
        return len(self.action_space)


Writing environment.py


In [3]:
%%writefile agent.py
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical

class AttentionPolicyNetwork(nn.Module):
    def __init__(self, input_dim, num_actions):
        super(AttentionPolicyNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.attention_layer = nn.Linear(128, 1)
        self.fc2 = nn.Linear(128, 64)
        self.output = nn.Linear(64, num_actions)

    def forward(self, state):
        x = torch.relu(self.fc1(state))
        attention_scores = torch.softmax(self.attention_layer(x), dim=0)
        x = x * attention_scores
        x = torch.relu(self.fc2(x))
        action_probs = torch.softmax(self.output(x), dim=-1)
        return action_probs

class AttentionPolicyAgent:
    def __init__(self, num_states, num_actions, alpha, gamma):
        self.gamma = gamma
        self.num_states = num_states
        self.num_actions = num_actions
        self.policy_network = AttentionPolicyNetwork(num_states, num_actions)
        self.optimizer = optim.Adam(self.policy_network.parameters(), lr=alpha)

    def choose_action(self, state):
        state_tensor = torch.FloatTensor(state).unsqueeze(0)
        action_probs = self.policy_network(state_tensor).squeeze(0)
        action_dist = Categorical(action_probs)
        action = action_dist.sample()
        return action.item(), action_probs[action]

    def compute_returns(self, rewards):
        returns = []
        G = 0
        for reward in reversed(rewards):
            G = reward + self.gamma * G
            returns.insert(0, G)
        return torch.FloatTensor(returns)

    def update_policy(self, trajectory):
        states, actions, rewards, action_probs = zip(*trajectory)
        returns = self.compute_returns(rewards)
        states_tensor = torch.FloatTensor(states)
        actions_tensor = torch.LongTensor(actions)
        action_probs_tensor = torch.stack(action_probs)
        log_probs = torch.log(action_probs_tensor)
        entropy = -torch.sum(action_probs_tensor * log_probs, dim=-1).mean()
        loss = -torch.sum(log_probs * returns) - 0.01 * entropy
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()


Writing agent.py


In [4]:
%%writefile simulation.py
import traci
import torch
import numpy as np

def run_simulation_reinforce(environment, agent, episodes):
    total_waiting_times_rl = []
    waiting_times_per_vehicle_rl = []
    initial_alpha = agent.optimizer.param_groups[0]['lr']
    decay_factor = 0.01

    print("\nEpisode Results")
    print("Format: Episode | Total Wait Time | Vehicles | Wait Time per Vehicle")
    print("-" * 75)

    for episode in range(episodes):
        agent.optimizer.param_groups[0]['lr'] = initial_alpha / (1 + decay_factor * episode)
        environment.start_simulation()
        total_waiting_time = 0.0
        vehicle_counts = set()
        current_state = environment.discretized_state(environment.compute_queue_length().values())
        trajectory = []
        step = 0
        previous_waiting_time = sum(environment.compute_waiting_time().values())
        
        while step < 3600:
            state_one_hot = [0] * agent.num_states
            state_one_hot[current_state] = 1
            action, action_prob = agent.choose_action(state_one_hot)
            needed_steps = environment.set_traffic_light_program(action)
            current_waiting_time = sum(environment.compute_waiting_time().values())
            waiting_time_difference = current_waiting_time - previous_waiting_time
            total_waiting_time += current_waiting_time
            current_vehicles = set(traci.vehicle.getIDList())
            vehicle_counts.update(current_vehicles)
            next_state = environment.discretized_state(environment.compute_queue_length().values())
            reward = -waiting_time_difference
            if waiting_time_difference < 0:
                reward *= 1.5
            trajectory.append((state_one_hot, action, reward, action_prob))
            current_state = next_state
            previous_waiting_time = current_waiting_time
            step += needed_steps
        agent.update_policy(trajectory)
        total_vehicles = len(vehicle_counts)
        waiting_time_per_vehicle = total_waiting_time / total_vehicles if total_vehicles > 0 else 0
        total_waiting_times_rl.append(total_waiting_time)
        waiting_times_per_vehicle_rl.append(waiting_time_per_vehicle)
        environment.close_simulation()
        print(f"#{episode+1:3d} | {total_waiting_time:12.1f} | {total_vehicles:3d} | {waiting_time_per_vehicle:8.1f}")
    return total_waiting_times_rl, waiting_times_per_vehicle_rl


Writing simulation.py


In [5]:
%%writefile testing.py
import traci
import torch
import numpy as np

def test_fixed_policy(environment, agent, test_episodes):
    average_waiting_times = []
    print("\nTesting Fixed Policy:")
    print("Format: Episode | Total Wait Time | Vehicles | Wait Time per Vehicle")
    print("-" * 40)
    for episode in range(test_episodes):
        environment.start_simulation()
        total_waiting_time = 0.0
        vehicle_counts = set()
        current_state = environment.discretized_state(environment.compute_queue_length().values())
        step = 0
        while step < 3600:
            state_one_hot = [0] * agent.num_states
            state_one_hot[current_state] = 1
            with torch.no_grad():
                action, _ = agent.choose_action(state_one_hot)
            needed_steps = environment.set_traffic_light_program(action)
            waiting_time_for_action = sum(environment.compute_waiting_time().values())
            total_waiting_time += waiting_time_for_action
            current_vehicles = set(traci.vehicle.getIDList())
            vehicle_counts.update(current_vehicles)
            next_state = environment.discretized_state(environment.compute_queue_length().values())
            current_state = next_state
            step += needed_steps
        total_vehicles = len(vehicle_counts)
        avg_wait_time = total_waiting_time / total_vehicles if total_vehicles > 0 else 0
        average_waiting_times.append(avg_wait_time)
        environment.close_simulation()
        print(f"#{episode+1:3d} | {total_waiting_time:12.1f} | {total_vehicles:3d} | {avg_wait_time:8.1f}")
    overall_avg_wait_time = np.mean(average_waiting_times)
    print(f"\nOverall Average Wait Time: {overall_avg_wait_time:.2f} seconds")
    return average_waiting_times


Writing testing.py


In [6]:
%%writefile plotting.py
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm

def plot_with_sem(data, saveinfo, x=None, title="Mean with Standard Error", xlabel="Episodes", ylabel="Value",
                  line_color='#ff007f', sem_color='cyan', alpha=0.3):
    data = np.array(data)
    n_samples, n_points = data.shape
    if x is None:
        x = np.arange(n_points)
    mean_values = np.mean(data, axis=0)
    std_dev = np.std(data, axis=0)
    sem = std_dev / np.sqrt(n_samples)
    print("Standard Error:", sem)
    plt.figure(figsize=(10, 6))
    plt.plot(x, mean_values, color=line_color, linewidth=1.5, label='Mean')
    plt.fill_between(x, mean_values - sem, mean_values + sem, color=sem_color, alpha=alpha, label='Standard Error Range')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.title(title)
    plt.legend()
    plt.savefig(saveinfo, dpi=300, bbox_inches='tight')
    plt.show()

def plot_gaussian(mean_values, saveinfo, col, bins=30):
    counts, bin_edges, _ = plt.hist(
        mean_values, 
        bins=bins, 
        alpha=0.6, 
        color=col, 
        edgecolor='black', 
        label="Mean Values"
    )
    bin_width = bin_edges[1] - bin_edges[0]
    N = len(mean_values)
    mu = np.mean(mean_values)
    sigma = np.std(mean_values)
    x = np.linspace(mu - 3*sigma, mu + 3*sigma, 100)
    pdf = norm.pdf(x, mu, sigma)
    scaled_pdf = pdf * N * bin_width
    plt.plot(
        x, 
        scaled_pdf, 
        label=f'Gaussian Fit\nμ={mu:.2f}, σ={sigma:.2f}', 
        color='red', 
        linewidth=2
    )
    plt.scatter(mean_values, np.zeros_like(mean_values), color='black', marker='o', label="Individual Means")
    plt.xlabel("Mean Value")
    plt.ylabel("Frequency")
    plt.title("Mean Values with Gaussian Fit (Frequency)")
    plt.legend()
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.savefig(saveinfo, dpi=300, bbox_inches='tight')
    plt.show()


Writing plotting.py


In [7]:
# Main orchestration cell
from config import train_episodes, test_episodes, sumoConfig, sumoBinary, gamma
from environment import Environment
from agent import AttentionPolicyAgent
from simulation import run_simulation_reinforce
from testing import test_fixed_policy
from plotting import plot_with_sem, plot_gaussian
import numpy as np

if __name__ == "__main__":
    num_runs = 8 
    train_data = []
    test_data = []
    for i in range(num_runs):
        print(f"\n{'='*20} Run {i+1}/{num_runs} {'='*20}")
        env = Environment(sumoConfig, sumoBinary)
        agent = AttentionPolicyAgent(
            num_states=81,
            num_actions=env.get_num_actions(),
            alpha=0.01,
            gamma=gamma
        )
        print("Starting Training Phase...")
        total_waiting_times_rl, waiting_times_per_vehicle_rl = run_simulation_reinforce(env, agent, train_episodes)
        train_data.append(waiting_times_per_vehicle_rl)
        print("\nStarting Testing Phase...")
        average_waiting_times = test_fixed_policy(env, agent, test_episodes)
        test_data.append(average_waiting_times)
        print(f"\n{'='*18} End of Run {i+1}/{num_runs} {'='*18}")
    
    train_data_array = np.array(train_data)
    train_mean_values = np.mean(train_data_array, axis=0)
    plot_with_sem(train_data_array, saveinfo=r"C:\Users\Administrator\Desktop\MySumo\stage 9(DRM with low congestion)\Results\PGAwoDRM/convergence.png")
    plot_gaussian(train_mean_values, saveinfo=r"C:\Users\Administrator\Desktop\MySumo\stage 9(DRM with low congestion)\Results\PGAwoDRM/train_gaussian.png", col='skyblue')
    test_data_array = np.array(test_data)
    test_mean_values = np.mean(test_data_array, axis=0)
    plot_gaussian(test_mean_values, saveinfo=r"C:\Users\Administrator\Desktop\MySumo\stage 9(DRM with low congestion)\Results\PGAwoDRM/test_gaussian.png", col='orange')


KeyboardInterrupt: 