# Red Light, Green Light: Dynamic Traffic Lights with RL
## By Erin Gregoire & Daniel Viola
### Spring 2025

In [None]:
import os
import random
import copy

import torch
import torch.optim as optim
from torch import nn
import numpy as np

#to whomever may test this, make sure you have sumo, it is not trivial to setup
from sumo_rl import SumoEnvironment


class DQNAgent(nn.Module):
    def __init__(
        self,
        action_space,
        initial_epsilon=1.0,    
        min_epsilon=0.05,     
        epsilon_decay=0.995,   
        lr=0.00005,           
        gamma=0.98              
    ):
        super().__init__()


        self.layers = nn.Sequential(
            nn.Linear(in_features=2, out_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32, out_features=32),
            nn.ReLU(),
            nn.Linear(in_features=32, out_features=action_space),
        )

        self.target_nn = copy.deepcopy(self.layers)

        self.epsilon = initial_epsilon
        self.min_epsilon = min_epsilon
        self.epsilon_decay = epsilon_decay
        self.gamma = gamma

        self.loss_fn = nn.MSELoss()
        self.optimizer = optim.Adam(self.layers.parameters(), lr=lr)

        self.device = torch.device("cuda")
        #print(self.device)
        self.to(self.device)

    def forward(self, x):
        return self.layers(x)

    def training_step(self, batch):
        obs, action, reward, next_obs, terminated = batch

        obs =torch.tensor(obs,dtype=torch.float32, device=self.device)
        action= torch.tensor(action, dtype=torch.long,device=self.device)
        reward= torch.tensor(reward,dtype=torch.float32, device=self.device)
        next_obs= torch.tensor(next_obs,dtype=torch.float32, device=self.device)
        terminated= torch.tensor(terminated,dtype=torch.float32, device=self.device)

        q_values= self.layers(obs) 
        q_pred =q_values.gather(1, action.unsqueeze(1)).squeeze(1)




        with torch.no_grad():

            next_q_current=self.layers(next_obs)
            best_for_current = next_q_current.argmax(dim =1)

            target_next_q =self.target_nn(next_obs) 
            double_q_time =target_next_q.gather(1, best_for_current.unsqueeze(1)).squeeze(1) 

            #cha cha real smooth
            q_target =reward +(1-terminated)*self.gamma *double_q_time

        loss =self.loss_fn(q_pred,q_target)
        self.optimizer.zero_grad() 
        loss.backward()  
        self.optimizer.step() 

    def update_target_network(self):
        self.target_nn.load_state_dict( self.layers.state_dict())

    def step(self, obs_vector): 

        if random.random() <self.epsilon: 
            num_actions =self.layers[-1].out_features 
            return random.randint(0, num_actions - 1)
        else:
            obs_tensor = torch.tensor(obs_vector, dtype=torch.float32, device=self.device).unsqueeze(0)
            q_values= self.layers(obs_tensor) 
            action= torch.argmax(q_values, dim=1).item()
            
            return action
        
    def winner_winner_chicken_dinner(self, obs):
        phase = int(obs[0])
        queues = obs[1:] 

        binned = []
        for q in queues:
            if q < 0.33:
                binned.append(0) 
            elif q < 0.66:
                binned.append(1)
            else:
                binned.append(2)

        return (phase, *binned)


class ReplayBuffer:
    def __init__(self, max_size):
        self.max_size = max_size
        self.buffer = [] 
        self.position = 0

    def update_buffer(self, obs, action, reward, next_obs, terminated):
        transition = (obs, action, reward, next_obs, terminated)
        if len(self.buffer) < self.max_size:

            self.buffer.append( transition)  
        else:
            self.buffer[self.position]=transition
        self.position =(self.position+ 1) %self.max_size


    def sample_batch(self, batch_size):
        
        x =random.sample(self.buffer, batch_size)
        obs_list =[]
        action_list=[]
        reward_list =[]
        next_obs_list=[]
        terminated_list =[]

        for transition in x:
            obs, action, reward, next_obs, terminated = transition
            obs_list.append(obs)
            action_list.append(action)
            reward_list.append(reward)
            next_obs_list.append(next_obs)
            terminated_list.append(terminated)


        return obs_list, action_list, reward_list, next_obs_list, terminated_list



traffic_ids = [
    "901708607", "901708581", "111358909",
    "cluster_11117046858_11117046859_111302133_6338690578_#1more",
    "cluster_11114466314_957732515",
    "cluster_11114466312_11114466316_111610488",
    "cluster_111338987_11959023487",
    "cluster_11117046870_11117046873_11117046875_957663394",
    "cluster_111569038_6338690548",
    "265842203",
    "cluster_11109129474_11117046864_11117046866_111408627_#1more",



    "cluster_12053387083_12053387276_12053387277_12067122981_#1more",
    "cluster_111452050_12053387076_12053387077_12053387078_#3more",
    "cluster_111621643_12054127196_12054127197_12054127198_#1more",
    "cluster_10906103986_11057916238_11113402178_11113402181_#1more",
    "cluster_11057916237_11109129500_11109129504_11117046877_#1more",
    "cluster_111610470_6338703466_6338703467_6338703468_#1more",
    "cluster_111454858_5844252914",
    "111571013",
    "cluster_111403667_1224821721_1224821747_2079395974_#5more",
    "cluster_111403670_5844252859_5844252860_5844252861_#1more",
    "111403672",
    "cluster_111403677_5844252867_5844252868_5844252869_#1more",
    "256402653",
    "111480199",
    "111418028"

]

neighbor_map = {
    "111418028":['PAD', 'PAD', 'PAD', "PAD", "PAD"],

    "901708607": [
        "901708581",
        "cluster_111338987_11959023487",
        "255961265",
        "PAD",
        "PAD"
    ],
    "901708581": [
        "901708607",
        "111358909",
        "PAD",
        "PAD",
        "PAD"
    ],
    "111358909": [
        "901708581",
        "cluster_11114466314_957732515",
        "cluster_11117046858_11117046859_111302133_6338690578_#1more",
        "111393479",
        "255961265"
    ],
    "cluster_11117046858_11117046859_111302133_6338690578_#1more": [
        "111358909",
        "cluster_11114466312_11114466316_111610488",
        "4450761915",
        "111349628",
        "111389483"
    ],
    "cluster_11114466314_957732515": [
        "cluster_11117046870_11117046873_11117046875_957663394",
        "cluster_11114466312_11114466316_111610488",
        "111358909",
        "111619226",
        "957732519"
    ],
    "cluster_11114466312_11114466316_111610488": [
        "cluster_11117046858_11117046859_111302133_6338690578_#1more",
        "cluster_11114466314_957732515",
        "cluster_111569038_6338690548",
        "PAD",
        "PAD"
    ],
    "cluster_111338987_11959023487": [
        "901708607",
        "cluster_11117046870_11117046873_11117046875_957663394",
        "cluster_12053387083_12053387276_12053387277_12067122981_#1more",
        "PAD",
        "PAD"
    ],
    "cluster_11117046870_11117046873_11117046875_957663394": [
        "cluster_111338987_11959023487",
        "cluster_11114466314_957732515",
        "265842203",
        "3050626911",
        "957732513"
    ],
    "cluster_111569038_6338690548": [
        "cluster_11114466312_11114466316_111610488",
        "cluster_11109129474_11117046864_11117046866_111408627_#1more",
        "111569042",
        "PAD",
        "PAD"
    ],
    "265842203": [
        "cluster_111452050_12053387076_12053387077_12053387078_#3more",
        "cluster_12053387083_12053387276_12053387277_12067122981_#1more",
        "cluster_111621643_12054127196_12054127197_12054127198_#1more",
        "PAD",
        "PAD"
    ],
    "cluster_11109129474_11117046864_11117046866_111408627_#1more": [
        "cluster_111569038_6338690548",
        "cluster_111610470_6338703466_6338703467_6338703468_#1more",
        "111408609",
        "PAD",
        "PAD"
    ],
    "cluster_12053387083_12053387276_12053387277_12067122981_#1more": [
        "cluster_111454858_5844252914",
        "cluster_111452050_12053387076_12053387077_12053387078_#3more",
        "PAD",
        "PAD",
        "PAD"
    ],
    "cluster_111452050_12053387076_12053387077_12053387078_#3more": [
        "cluster_12053387083_12053387276_12053387277_12067122981_#1more",
        "265842203",
        "cluster_111621643_12054127196_12054127197_12054127198_#1more",
        "cluster_111403667_1224821721_1224821747_2079395974_#5more",
        "111571013"
    ],
    "cluster_111621643_12054127196_12054127197_12054127198_#1more": [
        "cluster_111452050_12053387076_12053387077_12053387078_#3more",
        "cluster_10906103986_11057916238_11113402178_11113402181_#1more",
        "265842203",
        "cluster_111403670_5844252859_5844252860_5844252861_#1more",
        "PAD"
    ],
    "cluster_10906103986_11057916238_11113402178_11113402181_#1more": [
        "cluster_111621643_12054127196_12054127197_12054127198_#1more",
        "cluster_11057916237_11109129500_11109129504_11117046877_#1more",
        "111403672",
        "PAD",
        "PAD"
    ],
    "cluster_11057916237_11109129500_11109129504_11117046877_#1more": [
        "cluster_10906103986_11057916238_11113402178_11113402181_#1more",
        "cluster_111610470_6338703466_6338703467_6338703468_#1more",
        "cluster_111403677_5844252867_5844252868_5844252869_#1more",
        "356366407",
        "111434392"
    ],
    "cluster_111610470_6338703466_6338703467_6338703468_#1more": [
        "cluster_11057916237_11109129500_11109129504_11117046877_#1more",
        "111480199",
        "cluster_11109129474_11117046864_11117046866_111408627_#1more",
        "111434392",
        "356366407"
    ],
    "cluster_111454858_5844252914": [
        "111571013",
        "cluster_12053387083_12053387276_12053387277_12067122981_#1more",
        "PAD",
        "PAD",
        "PAD"
    ],
    "111571013": [
        "cluster_111454858_5844252914",
        "cluster_111403667_1224821721_1224821747_2079395974_#5more",
        "cluster_111452050_12053387076_12053387077_12053387078_#3more",
        "PAD",
        "PAD"
    ],
    "cluster_111403667_1224821721_1224821747_2079395974_#5more": [
        "111571013",
        "cluster_111452050_12053387076_12053387077_12053387078_#3more",
        "cluster_111403670_5844252859_5844252860_5844252861_#1more",
        "PAD",
        "PAD"
    ],
    "cluster_111403670_5844252859_5844252860_5844252861_#1more": [
        "cluster_111403667_1224821721_1224821747_2079395974_#5more",
        "cluster_111621643_12054127196_12054127197_12054127198_#1more",
        "111403672",
        "PAD",
        "PAD"
    ],
    "111403672": [
        "cluster_111403670_5844252859_5844252860_5844252861_#1more",
        "cluster_10906103986_11057916238_11113402178_11113402181_#1more",
        "cluster_111403677_5844252867_5844252868_5844252869_#1more",
        "PAD",
        "PAD"
    ],
    "cluster_111403677_5844252867_5844252868_5844252869_#1more": [
        "111403672",
        "cluster_11057916237_11109129500_11109129504_11117046877_#1more",
        "256402653",
        "PAD",
        "PAD"
    ],
    "256402653": [
        "cluster_111403677_5844252867_5844252868_5844252869_#1more",
        "cluster_111610470_6338703466_6338703467_6338703468_#1more",
        "PAD",
        "PAD",
        "PAD"
    ],
    "111480199": [
        "cluster_111610470_6338703466_6338703467_6338703468_#1more",
        "PAD",
        "PAD",
        "PAD",
        "PAD"
    ]
}

#we no longer use neighbors as it didnt work well




os.environ["SUMO_HOME"] = "C:/Users/alien/AppData/Local/Programs/Python/Python39/Lib/site-packages/sumo"
import pickle

env = SumoEnvironment(
    net_file="buffalo/buffalo_final.net.xml",
    route_file="buffalo/phase2.rou.xml",
    use_gui=True,    
    num_seconds=2500, 
    single_agent=False,

    delta_time=5,
    reward_fn='pressure'
)

def winner_winner_chicken_dinner(self, obs):
    phase = int(obs[0])
    queues = obs[1:]

    binned = []
    for q in queues:
        if q < 0.33:
            #print(q)
            binned.append(0)
        elif q < 0.66:
            binned.append(1)
        else:
            binned.append(2)

    return (phase, *binned)
saved_weights =[]
with open("ergregoi_dviola_ub_medical_campus_ddqn_weights", "rb") as f:
    saved_weights = pickle.load(f)

agents = {}
buffers = {}
for traffic_id in traffic_ids:

    agent = DQNAgent(
        action_space=env.traffic_signals[traffic_id].num_green_phases,
        initial_epsilon= 0,
        min_epsilon=0,
        epsilon_decay=1,
        lr=0.00005,
        gamma=.95

    )


    if traffic_id in saved_weights:
        agent.load_state_dict(saved_weights[traffic_id])

        agent.update_target_network()

    print(env.traffic_signals[traffic_id].num_green_phases, traffic_id)
    agents[traffic_id] = agent


    buffers[traffic_id] = ReplayBuffer(max_size=10000)


num_episodes  = 3
batch_size= 86
target_update_freq = 4
eval_freq=5500
best_reward=-100000000000
phase_locker ={traffic_id: 0 for traffic_id in traffic_ids }


evaluation=True
prev_actions = {traffic_id: 0 for traffic_id in traffic_ids}
if evaluation ==False:
    for episode in range(num_episodes):
        timestep_counter = 0
        print(episode)

        obs = env.reset()
        done = False
        total_reward = {tid: 0 for tid in traffic_ids}

        phase = {tid: obs[tid][0] for tid in traffic_ids}
        queue = {tid: obs[tid][1] for tid in traffic_ids}

        while not done:
            timestep_counter += 1

            if timestep_counter % 400 == 0:
                for traffic_id in traffic_ids:
                    agent = agents[traffic_id]
                    agent.epsilon = max(agent.min_epsilon, agent.epsilon * agent.epsilon_decay)
                    #print(agent.epsilon)
            actions = {}
            for traffic_id in traffic_ids:
                state = [phase[traffic_id], queue[traffic_id]]
                discretized_state = winner_winner_chicken_dinner(None, state)

                if phase_locker[traffic_id] <= 0:
                    action = agents[traffic_id].step(discretized_state)
                    prev_actions[traffic_id] = action
                    phase_locker[traffic_id] = 3
                else:
                    action = prev_actions[traffic_id]
                    phase_locker[traffic_id] -= 1

                valid_actions = list(range(env.traffic_signals[traffic_id].num_green_phases))
                if action not in valid_actions:
                    action = random.choice(valid_actions)

                actions[traffic_id] = action

            next_obs, reward_, terminated, info = env.step(actions)

            phase_next ={tid: next_obs[tid][0 ] for tid in traffic_ids}
            queue_next = {tid: next_obs[tid][1] for tid in traffic_ids}

            for tid in traffic_ids:
                curr_state= [phase[tid], queue[tid]]
                next_state = [phase_next[tid], queue_next[tid]]

                disc_curr= winner_winner_chicken_dinner(None,curr_state)
                disc_next= winner_winner_chicken_dinner(None, next_state)

                buffers[tid].update_buffer(disc_curr, actions[tid],  reward_[tid], disc_next, terminated[tid])
                total_reward[tid] += reward_[tid]

                if len(buffers[tid].buffer) >= batch_size and episode % 4 == 0:
                    batch = buffers[tid].sample_batch(batch_size)
                    agents[tid].training_step(batch)

            obs = next_obs
            phase = phase_next
            queue = queue_next
            done = terminated.get("__all__", False)





        if (episode+1)%target_update_freq==0:
            for t in traffic_ids:
                agents[t].update_target_network()
        w={}
        w2={}

        avg_r=sum(total_reward.values())/len(total_reward)
        if avg_r > best_reward:

            best_reward=avg_r
            for t in traffic_ids:
                w[t]=agents[t].state_dict()
            with open("dqn_traffic_agents_best.pkl3","wb") as ff:
                pickle.dump(w,ff)

        for t in traffic_ids:
            w2[t]=agents[t].state_dict()
        with open("dqn_traffic_agents.pkl3","wb") as ff:
            pickle.dump(w,ff)

if evaluation == True:
    prev_actions = {traffic_id: 0 for traffic_id in traffic_ids}

    for episode in range(num_episodes):
        timestep_counter = 0
        print(episode)
        obs = env.reset()
        done = False
        total_reward = {tid: 0 for tid in traffic_ids}

        phase = {tid: obs[tid][0] for tid in traffic_ids}
        queue = {tid: obs[tid][1] for tid in traffic_ids}

        while not done:
            timestep_counter += 1

            actions = {}
            for traffic_id in traffic_ids:
                state = [phase[traffic_id], queue[traffic_id]]
                discretized_state = winner_winner_chicken_dinner(None, state)

                if phase_locker[traffic_id] <= 0:
                    action = agents[traffic_id].step(discretized_state)
                    prev_actions[traffic_id] = action
                    phase_locker[traffic_id] = 3
                else:
                    action = prev_actions[traffic_id]
                    phase_locker[traffic_id] -= 1

                valid_actions = list(range(env.traffic_signals[traffic_id].num_green_phases))
                if action not in valid_actions:
                    action = random.choice(valid_actions)

                actions[traffic_id] = action

            next_obs, reward_, terminated, info = env.step(actions)

            phase_next = {tid: next_obs[tid][0] for tid in traffic_ids}
            queue_next = {tid: next_obs[tid][1] for tid in traffic_ids}

            for tid in traffic_ids:
                total_reward[tid] += reward_[tid]

            obs = next_obs
            phase =phase_next
            queue =queue_next
            done = terminated.get("__all__", False)

        avg_r = sum(total_reward.values()) / len(total_reward)
        print(avg_r)


