
# <div class="alert" align="CENTER" > <style> h1 {color:#3498DB;background:#BFC9CA} </style> <strong> TRAFFIC LIGHT CONTROL WITH DEEP Q-NETWORKS <BR> </strong></div>



<div class="alert" align="CENTER" >  <strong> GROUP 2 <BR><br> </strong>

`Group Members:` <br>Meenu Ramesh<br> Aardran Premakumar <br>Mohammed Nazim</div>

### <B><CENTER> IMPORTING THE LIBRARIES</CENTER></B>

In [None]:
import pandas as pd
import warnings
import matplotlib.pyplot as plt
import os
import sys
import traci
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
import random
from collections import Counter

### <B><CENTER> FILTERING THE WARNINGS FROM OUTPUT</CENTER></B>

In [None]:
# Suppressing warnings and configure Pandas to display all columns
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)
plt.style.use('fivethirtyeight')

### <B><CENTER> SETTING UP SUMO ENVIRONMENT</CENTER></B>

In [None]:
# Setting the environment variable for SUMO and verifying its presence
os.environ["SUMO_HOME"] = r'C:\Program Files (x86)\Eclipse\Sumo'

if 'SUMO_HOME' not in os.environ:
    sys.exit("Please declare the 'SUMO_HOME' environment variable.")

# Adding SUMO tools to Python path
tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
sys.path.append(tools)

# Path to the SUMO GUI binary for visual simulation
sumoBinary = os.path.join(os.environ['SUMO_HOME'], 'bin', 'sumo-gui.exe')

# Configuration file path and command setup for starting SUMO with TraCI
sumoConfigPath = '.\location' # location of the SUMO configuration file
sumoCmd = [sumoBinary, "-c", sumoConfigPath, "--start", "--quit-on-end"]

### <B><CENTER> DQN MODEL ARCHITECTURE</CENTER></B>

In [None]:
# Defining the neural network architecture for the DQN
model = Sequential()
model.add(Dense(32, input_dim=10, activation='relu'))  # Input layer with state size(10)
model.add(Dense(32, activation='relu')) # Hidden layer
model.add(Dense(32, activation='relu'))
model.add(Dense(4, activation='linear'))  # Output layer with action size(4)
model.compile(loss='mse', optimizer=tf.optimizers.Adam(learning_rate=0.001))

model.summary()

### <B><CENTER> APPLYING EPSILON-GREEDY POLICY</CENTER></B>

In [None]:
# Epsilon-greedy policy for action selection
def select_action(state, model, epsilon):
    if np.random.rand() <= epsilon:
        return np.random.choice(4)  # Random action choice
    else:
        return np.argmax(model.predict(np.array([state])))

### <B><CENTER> INITIALIZING LANE ID'S</CENTER></B>

In [None]:
# List of lane IDs used in the SUMO simulation
lanes = [
    ":2184943151_0_0",
    ":2586216133_0_0",
    ":293196396_0_0",
    ":293196396_1_0",
    ":293196396_2_0",
    ":293196396_3_0",
    ":293196396_16_0",
    ":293196396_17_0",
    ":293196396_4_0",
    ":293196396_5_0",
    ":293196396_6_0",
    ":293196396_7_0",
    ":293196396_18_0",
    ":293196396_19_0",
    ":293196396_8_0",
    ":293196396_9_0",
    ":293196396_10_0",
    ":293196396_11_0",
    ":293196396_20_0",
    ":293196396_21_0",
    ":293196396_12_0",
    ":293196396_13_0",
    ":293196396_14_0",
    ":293196396_15_0",
    ":293196396_22_0",
    ":293196396_23_0",
    ":295532041_0_0",
    ":5192644484_0_0"
]

### <B><CENTER> STATE FUNCTION</CENTER></B>

In [None]:
# Function to fetch the current state from the simulation
def get_state():
    queue_lengths = [traci.lane.getLastStepHaltingNumber(lane) for lane in lanes]
    signal_state = traci.trafficlight.getRedYellowGreenState("293196396") # Traffic light ID
    signal_state_vector = [1 if state == 'G' else 0 for state in signal_state]
    elapsed_time = traci.trafficlight.getPhaseDuration("293196396") - traci.trafficlight.getNextSwitch("293196396")
    state = queue_lengths + signal_state_vector + [elapsed_time]
    return np.random.rand(10)

### <B><CENTER> REWARD FUNCTION</CENTER></B>

In [None]:

# Function to calculate the reward based on the current traffic situation
def get_reward():
    waiting_time = sum(traci.lane.getWaitingTime(lane) for lane in traci.lane.getIDList())
    reward = -waiting_time # Negative reward for longer waiting times
    return np.random.rand()

### <B><CENTER> REPLAY BUFFER</CENTER></B>

In [None]:
# Class to handle storing and sampling experiences for training
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def add(self, state, action, reward, next_state, done):
        self.buffer.append((state, action, reward, next_state, done))

    def sample(self, batch_size):
        return random.sample(self.buffer, batch_size)

### <B><CENTER> TRAINING FUNCTION</CENTER></B>

In [None]:
# Function defining a single training step using a mini-batch from the replay buffer
def train_model(model, target_model, replay_buffer, batch_size, gamma):
    batch = replay_buffer.sample(batch_size)
    states, actions, rewards, next_states, dones = zip(*batch)

    # Convert batch data to numpy arrays for processing
    states = np.array(states)
    next_states = np.array(next_states)
    rewards = np.array(rewards)
    actions = np.array(actions)
    dones = np.array(dones)

    # Calculate the next Q-values using the target model
    next_q_values = target_model.predict(next_states)
    max_next_q_values = np.max(next_q_values, axis=1)

    # Compute the target Q-values using the Bellman equation
    target_q_values = rewards + (gamma * max_next_q_values * (1 - dones))

    # Update the Q-values for the actions taken
    q_values = model.predict(states)
    q_values[range(batch_size), actions] = target_q_values

    # Perform a gradient descent step to update the model
    model.fit(states, q_values, epochs=1, verbose=0)

### <B><CENTER> HYPER PARAMETERS</CENTER></B>

In [None]:
# Define initial hyperparameters for the training process
epsilon = 1.0  # Initial exploration rate
epsilon_min = 0.01  # Minimum exploration rate
epsilon_decay = 0.95  # Decay rate for exploration-exploitation balance
rewards_during_dqn = []  # To store rewards for each episode for analysis
num_episodes = 10  # Total number of episodes for training
max_steps_per_episode = 1000  # Maximum steps per episode

### <B><CENTER> SIMULATION</CENTER></B>

In [None]:
# Initialize the replay buffer and training variables
replay_buffer = ReplayBuffer(capacity=10000)
batch_size = 32  # Number of experiences to sample from buffer
gamma = 0.99  # Discount factor for future rewards
target_model = tf.keras.models.clone_model(model) # Clone model for stable Q-value estimation

# Start the SUMO simulation using TraCI
traci.start(sumoCmd)
while not traci.simulation.getMinExpectedNumber() <= 0:



# Initialize metrics for analysis
    average_queue_lengths = []  # To monitor traffic queue lengths
    actions_taken = []  # To analyze the distribution of actions taken
    step_rewards = [[] for _ in range(num_episodes)]  # Nested list to store rewards per step per episode

    for episode in range(num_episodes):
        traci.load(["-c", sumoConfigPath, "--start", "--quit-on-end"])  # Restart the scenario for each episode
        state = get_state()
        total_reward = 0

        for step in range(max_steps_per_episode):
            action = select_action(state, model, epsilon) # Select an action using the policy
            
            traci.simulationStep() # Advance the simulation
            
            next_state = get_state()
            reward = get_reward()
            done = step == max_steps_per_episode - 1  # Check if episode should end
            replay_buffer.add(state, action, reward, next_state, done) # Store experience in buffer
            total_reward += reward
            step_rewards[episode].append(reward) # Log rewards for analysis

            # Train the model every 'batch_size' steps
            if len(replay_buffer.buffer) >= batch_size:
                train_model(model, target_model, replay_buffer, batch_size, gamma)

            state = next_state  # Transition to the next state
            actions_taken.append(action)  # Log the action taken

            # Calculate average queue length for this step and log it
            current_queue_lengths = [traci.lane.getLastStepHaltingNumber(lane) for lane in lanes]
            average_queue_length = sum(current_queue_lengths) / len(lanes)
            average_queue_lengths.append(average_queue_length)

        # Periodically update the target model to stabilize learning
        if episode % 10 == 0:
            target_model.set_weights(model.get_weights())

        # Update exploration rate
        epsilon = max(epsilon_min, epsilon_decay * epsilon)
        rewards_during_dqn.append(total_reward) # Store total reward for this episode

traci.close() # Close the TraCI connection to SUMO

### <B><CENTER> REWARDS RANGE</CENTER></B>

In [None]:
# Output the range of total rewards and rewards per step for analysis
print("Total rewards range:", min(rewards_during_dqn), max(rewards_during_dqn))
print("Step rewards range:", min(min(step_rewards)), max(max(step_rewards)))

### <B><CENTER> PLOT - REWARDS OVER TIME FOR EACH EPISODE</CENTER></B>

In [None]:
# Determine layout for subplot grid based on the number of episodes
cols = 5  
rows = (num_episodes + cols - 1) // cols 
episodes = max(0, num_episodes - 10)  # Start plotting from the last 10 episodes

# Create a subplot for each episode to plot rewards over time
fig, axs = plt.subplots(rows, cols, figsize=(15, rows * 4), constrained_layout=True)
fig.suptitle('Rewards Over Time for Each Episode')

for ep in range(num_episodes):
    # Loop over each episode and configure subplot for reward plot
    row = ep // cols
    col = ep % cols
    ax = axs[row, col] if rows > 1 else axs[col]

    # Plot the rewards for the episode
    ax.plot(step_rewards[ep])
    ax.set_title(f'Episode {ep+1}')
    ax.set_xlabel('Step')
    ax.set_ylabel('Reward')
    ax.label_outer()  # Hide labels on inner plots for clarity

# Hide unused subplots if there are fewer episodes than slots in the grid
for i in range(episodes, rows * cols):
    fig.delaxes(axs.flatten()[i])
plt.show()

### <B><CENTER> PLOT - TOTAL REWARD OVER EPISODES</CENTER></B>

In [None]:
# Plot the total reward progression over multiple episodes
plt.figure()
plt.plot(range(len(rewards_during_dqn)), rewards_during_dqn)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('Total Reward Over Episodes')
plt.show()

### <B><CENTER> PIE CHART - DISTRIBUTION OF ACTIONS TAKEN</CENTER></B>

In [None]:
# Analyze the distribution of actions taken throughout the simulation
action_counts = Counter(actions_taken)
actions, frequencies = zip(*sorted(action_counts.items()))

# Visualize the action distribution in a pie chart
plt.figure(figsize=(8, 8)) 
plt.pie(frequencies, labels=actions, autopct='%1.2f%%', startangle=140)
plt.title('Distribution of Actions Taken')
plt.show()