In [1]:
# Install SUMO and other required packages

!apt-get update -qq

!apt-get install -y sumo sumo-tools sumo-doc

!pip install stable-baselines3 --quiet

!pip install traci

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  binfmt-support fastjar fonts-roboto-unhinted gdal-data jarwrapper
  javascript-common libarmadillo9 libarpack2 libcfitsio8 libcharls2 libdap25
  libdapclient6v5 libepsilon1 libfox-1.6-0 libfreexl1 libfyba0 libgdal26
  libgeos-3.8.0 libgeos-c1v5 libgeotiff5 libglu1-mesa libhdf4-0-alt
  libjs-jquery libjs-openlayers libjs-underscore libkmlbase1 libkmldom1
  libkmlengine1 libminizip1 libmysqlclient21 libnetcdf15 libodbc1 libogdi4.1
  libpoppler97 libpq5 libproj15 libqhull7 libspatialite7 libsuperlu5
  liburiparser1 libxerces-c3.2 mysql-common odbcinst odbcinst1debian2
  poppler-data proj-bin proj-data
Suggested packages:
  apache2 | lighttpd | httpd geotiff-bin gdal-bin libgeotiff-epsg libhdf4-doc
  libhdf4-alt-dev hdf4-tools libmyodbc odbc-postgresql tdsodbc unixodbc-bin
  ogdi-bin poppler-utils ghostscript fonts-japanese-mincho
  | fonts-

In [2]:
import numpy as np

import random

from collections import deque


import os

import sys

import traci

import matplotlib.pyplot as plt

In [3]:
# Set SUMO_HOME environment variable

os.environ['SUMO_HOME'] = '/usr/share/sumo'



# Import traci module

if 'SUMO_HOME' in os.environ:

    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')

    sys.path.append(tools)

else:

    sys.exit("Please declare environment variable 'SUMO_HOME'")

In [4]:
!sumo -c my_simulation.sumocfg

Error: Could not access configuration 'my_simulation.sumocfg'.
Quitting (on error).


In [5]:
import os
import subprocess
import sys
import traci

def create_sumo_files():
    # Create nodes file with traffic lights
    with open('my_nodes.nod.xml', 'w') as f:
        f.write('''<nodes>
    <node id="1" x="0" y="0" type="priority"/>
    <node id="2" x="100" y="0" type="priority"/>
    <node id="3" x="25" y="50" type="priority"/>
    <node id="4" x="25" y="-50" type="priority"/>
    <node id="5" x="75" y="50" type="priority"/>
    <node id="6" x="75" y="-50" type="priority"/>
    <node id="7" x="25" y="0" type="traffic_light"/>
    <node id="8" x="75" y="0" type="traffic_light"/>
                
    <node id="e1" x="0" y="25" type="priority"/>
    <node id="e2" x="25" y="0" type="priority"/>
</nodes>''')

    # Create edges file
    with open('my_edges.edg.xml', 'w') as f:
        f.write('''<edges>
    <edge id="1to7" from="1" to="7" priority="1" numLanes="2" speed="13.89"/>
    <edge id="7to8" from="7" to="8" priority="1" numLanes="2" speed="13.89"/>
    <edge id="8to2" from="8" to="2" priority="1" numLanes="2" speed="13.89"/>
    <edge id="2to8" from="2" to="8" priority="1" numLanes="2" speed="13.89"/>
    <edge id="8to7" from="8" to="7" priority="1" numLanes="2" speed="13.89"/>
    <edge id="7to1" from="7" to="1" priority="1" numLanes="2" speed="13.89"/>
    <edge id="3to7" from="3" to="7" priority="1" numLanes="2" speed="13.89"/>
    <edge id="7to4" from="7" to="4" priority="1" numLanes="2" speed="13.89"/>
    <edge id="4to7" from="4" to="7" priority="1" numLanes="2" speed="13.89"/>
    <edge id="7to3" from="7" to="3" priority="1" numLanes="2" speed="13.89"/>
    <edge id="5to8" from="5" to="8" priority="1" numLanes="2" speed="13.89"/>
    <edge id="8to6" from="8" to="6" priority="1" numLanes="2" speed="13.89"/>
    <edge id="6to8" from="6" to="8" priority="1" numLanes="2" speed="13.89"/>
    <edge id="8to5" from="8" to="5" priority="1" numLanes="2" speed="13.89"/>
                
    <edge id="e1to7" from="e1" to="7" priority="1" numLanes="1" speed="20.0"/>
    <edge id="7toe1" from="7" to="e1" priority="1" numLanes="1" speed="20.0"/>
</edges>''')

    # Generate network file using netconvert
    subprocess.run(['netconvert', '--node-files=my_nodes.nod.xml', '--edge-files=my_edges.edg.xml', '--output-file=my_network.net.xml', '--tls.guess', '--tls.default-type', 'static'])

    # Create routes file with more vehicles
    with open('my_routes.rou.xml', 'w') as f:
        f.write('''<routes>
                
     <!-- Vehicles -->     
                      
    <vType id="car" accel="0.8" decel="4.5" sigma="0.5" length="5" minGap="2.5" maxSpeed="16.67" guiShape="passenger"/>
    <vType id="emergency_car" accel="1.2" decel="6.0" sigma="0.5" length="6" minGap="2.5" maxSpeed="25.0" guiShape="emergency"/>

    <!-- Common Routes -->
                           
    <route id="route0" edges="1to7 7to8 8to2"/>
    <route id="route1" edges="2to8 8to7 7to1"/>
    <route id="route2" edges="3to7 7to4"/>
    <route id="route3" edges="4to7 7to3"/>
    <route id="route4" edges="5to8 8to6"/>
    <route id="route5" edges="6to8 8to5"/>
                

    <!-- Emergency Routes -->
                        
    <route id="emergency_route1" edges="e1to7 7to8 8to2"/>
    <route id="emergency_route2" edges=" 2to8 8to7 7toe1"/>  
              


    <!-- Complex Routes -->
                               
    <route id="complex_route1" edges="1to7 7to4"/>
    <route id="complex_route2" edges="1to7 7to3"/>
    <route id="complex_route3" edges="1to7 7to8 8to6"/>  
    <route id="complex_route4" edges="1to7 7to8 8to5"/>
    <route id="complex_route5" edges="1to7 7to8 8to2"/>         
    
    <route id="complex_route6" edges="2to8 8to6"/>'
    <route id="complex_route7" edges="2to8 8to5"/>
    <route id="complex_route8" edges="2to8 8to7 7to4"/>
    <route id="complex_route9" edges="2to8 8to7 7to3"/>
    <route id="complex_route10" edges="2to8 8to7 7to1"/>

    <route id="complex_route11" edges="3to7 7to8 8to6"/>
    <route id="complex_route12" edges="3to7 7to8 8to5"/>
    <route id="complex_route13" edges="3to7 7to8 8to2"/>
    <route id="complex_route14" edges="3to7 7to4"/>

    <route id="complex_route15" edges="4to7 7to8 8to6"/>
    <route id="complex_route16" edges="4to7 7to8 8to5"/>
    <route id="complex_route17" edges="4to7 7to8 8to2"/>
    <route id="complex_route18" edges="4to7 7to3"/>

    <route id="complex_route19" edges="5to8 8to2"/>
    <route id="complex_route20" edges="5to8 8to7 7to4"/>
    <route id="complex_route21" edges="5to8 8to7 7to3"/>
    <route id="complex_route22" edges="5to8 8to6"/>

    <route id="complex_route23" edges="6to8 8to2"/>
    <route id="complex_route24" edges="6to8 8to7 7to4"/>
    <route id="complex_route25" edges="6to8 8to7 7to3"/>
    <route id="complex_route26" edges="6to8 8to5"/>
 
                
    <!-- Flows -->
                            
    <flow id="flow0" type="car" route="route0" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="flow1" type="car" route="route1" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="flow2" type="car" route="route2" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="flow3" type="car" route="route3" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="flow4" type="car" route="route4" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="flow5" type="car" route="route5" begin="0" end="3600" vehsPerHour="50"/>
            
    <flow id="emergency_flow1" type="emergency_car" route="emergency_route1" begin="0" end="3600" vehsPerHour="1"/>
    <flow id="emergency_flow2" type="emergency_car" route="emergency_route2" begin="0" end="3600" vehsPerHour="2"/>
                
    <flow id="complex_flow1" type="car" route="complex_route1" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow2" type="car" route="complex_route2" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow3" type="car" route="complex_route3" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow4" type="car" route="complex_route4" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow5" type="car" route="complex_route5" begin="0" end="3600" vehsPerHour="50"/>
                
    <flow id="complex_flow6" type="car" route="complex_route6" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow7" type="car" route="complex_route7" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow8" type="car" route="complex_route8" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow9" type="car" route="complex_route9" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow10" type="car" route="complex_route10" begin="0" end="3600" vehsPerHour="50"/>
                
    <flow id="complex_flow11" type="car" route="complex_route11" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow12" type="car" route="complex_route12" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow13" type="car" route="complex_route13" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow14" type="car" route="complex_route14" begin="0" end="3600" vehsPerHour="50"/>
    
    <flow id="complex_flow15" type="car" route="complex_route15" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow16" type="car" route="complex_route16" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow17" type="car" route="complex_route17" begin="0" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow18" type="car" route="complex_route18" begin="0" end="3600" vehsPerHour="50"/>
                
    <flow id="complex_flow19" type="car" route="complex_route19" begin="400" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow20" type="car" route="complex_route20" begin="400" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow21" type="car" route="complex_route21" begin="800" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow22" type="car" route="complex_route22" begin="800" end="3600" vehsPerHour="50"/>
    
    <flow id="complex_flow23" type="car" route="complex_route23" begin="1500" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow24" type="car" route="complex_route24" begin="1500" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow25" type="car" route="complex_route25" begin="1500" end="3600" vehsPerHour="50"/>
    <flow id="complex_flow26" type="car" route="complex_route26" begin="1500" end="3600" vehsPerHour="50"/>
                        
</routes>''')

    # Create SUMO configuration file
    with open('my_simulation.sumocfg', 'w') as f:
        f.write('''<configuration>
    <input>
        <net-file value="my_network.net.xml"/>
        <route-files value="my_routes.rou.xml"/>
    </input>
    <time>
        <begin value="0"/>
        <end value="3600"/>
    </time>
    <report>
        <verbose value="true"/>
        <no-step-log value="true"/>
    </report>
</configuration>''')


def run_sumo_simulation():
    # Run SUMO simulation
    subprocess.run(['sumo', '-c', 'my_simulation.sumocfg'])


if __name__ == "__main__":
    # Check if SUMO_HOME is set
    if 'SUMO_HOME' not in os.environ:
        print("Please set the SUMO_HOME environment variable")
        exit(1)

    # Add SUMO tools to Python path
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)

    # Create SUMO files
    create_sumo_files()
    print("SUMO files created successfully.")

    # Run SUMO simulation
    print("Running SUMO simulation...")
    run_sumo_simulation()
    print("SUMO simulation completed.")

Success.
SUMO files created successfully.
Running SUMO simulation...
Loading configuration ... done.
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Simulation ended at time: 3600.00
Reason: The final simulation step has been reached.
Performance: 
 Duration: 572ms
 Real time factor: 6293.71
 UPS: 231270.979021
Vehicles: 
 Inserted: 1086 (Loaded: 1491)
 Running: 33
 Waiting: 405
Teleports: 11 (Jam: 1, Yield: 9, Wrong Lane: 1)

SUMO simulation completed.




In [6]:
import traci
import random
import numpy as np

def calculate_reward(state):
    avg_speed = state['average_speed']
    total_queue = sum(state['queue_lengths'].values())
    return avg_speed - 0.1 * total_queue

def discretize_state(state):
    def categorize(value, ranges):
        for i, r in enumerate(ranges):
            if value <= r:
                return i
        return len(ranges)

    discretized = {
        'traffic_light_phases': state['traffic_light_phases'],
        'vehicle_count': categorize(state['vehicle_count'], [10, 30, 40]),
        'average_speed': categorize(state['average_speed'], [0.5, 1, 2]),
        'queue_lengths': {edge: categorize(length, [0, 5, 8]) for edge, length in state['queue_lengths'].items()}
    }
    return tuple(discretized.values())

def generate_history(steps=3600):
    history = []
    traci.start(['sumo', '-c', 'my_simulation.sumocfg'])
    
    total_waiting_time = 0
    total_travel_time = 0
    total_distance_travelled = 0
    total_fuel_consumption = 0
    total_co2_emission = 0
    
    for step in range(steps):
        current_state = {
            'time_step': step,
            'traffic_light_phases': {
                '7': traci.trafficlight.getPhase('7'),
                '8': traci.trafficlight.getPhase('8')
            },
            'vehicle_count': traci.vehicle.getIDCount(),
            'average_speed': traci.vehicle.getIDCount() > 0 and sum(traci.vehicle.getSpeed(v) for v in traci.vehicle.getIDList()) / traci.vehicle.getIDCount() or 0,
            'queue_lengths': {
                edge: traci.edge.getLastStepHaltingNumber(edge) for edge in 
                ['1to7', '3to7', '5to8', 'e1to7', '7toe1', '2to8', '8to2', '8to7', '7to1', '7to4', '4to7', '7to3', '8to6', '6to8', '8to5']
            }
        }
        
        action = {
            '7': random.randint(0, 3),
            '8': random.randint(0, 3)
        }
        traci.trafficlight.setPhase('7', action['7'])
        traci.trafficlight.setPhase('8', action['8'])
        
        traci.simulationStep()
        
        next_state = {
            'time_step': step + 1,
            'traffic_light_phases': {
                '7': traci.trafficlight.getPhase('7'),
                '8': traci.trafficlight.getPhase('8')
            },
            'vehicle_count': traci.vehicle.getIDCount(),
            'average_speed': traci.vehicle.getIDCount() > 0 and sum(traci.vehicle.getSpeed(v) for v in traci.vehicle.getIDList()) / traci.vehicle.getIDCount() or 0,
            'queue_lengths': {
                edge: traci.edge.getLastStepHaltingNumber(edge) for edge in 
                ['1to7', '3to7', '5to8', 'e1to7', '7toe1', '2to8', '8to2', '8to7', '7to1', '7to4', '4to7', '7to3', '8to6', '6to8', '8to5']
            }
        }
        
        reward = calculate_reward(next_state)
        current_state = discretize_state(current_state)
        next_state = discretize_state(next_state)
        history.append((current_state, action, reward, next_state))
        
        # Update metrics
        total_waiting_time += sum(traci.vehicle.getWaitingTime(v) for v in traci.vehicle.getIDList())
        total_travel_time += sum(traci.vehicle.getAccumulatedWaitingTime(v) for v in traci.vehicle.getIDList())
        total_distance_travelled += sum(traci.vehicle.getDistance(v) for v in traci.vehicle.getIDList())
        total_fuel_consumption += sum(traci.vehicle.getFuelConsumption(v) for v in traci.vehicle.getIDList())
        total_co2_emission += sum(traci.vehicle.getCO2Emission(v) for v in traci.vehicle.getIDList())
    
    
    
    # Calculate final metrics
    metrics = {
        'average_waiting_time': total_waiting_time / steps,
        'average_travel_time': total_travel_time / steps,
        'average_speed': total_distance_travelled / total_travel_time if total_travel_time > 0 else 0,
        'total_distance_travelled': total_distance_travelled,
        'average_fuel_consumption': total_fuel_consumption / steps,
        'average_co2_emission': total_co2_emission / steps,
        'throughput': len(set(v for state, _, _, _ in history for v in traci.simulation.getArrivedIDList()))
    }
    
    traci.close()
    return history, metrics

def run_sumo_simulation():
    history, metrics = generate_history()
    return history, metrics

#history, metrics = run_sumo_simulation()
#print("Simulation metrics:", metrics)

In [7]:
#traci.close()

In [8]:
import traci
import random
import numpy as np

# Q-learning parameters
LEARNING_RATE = 0.1
DISCOUNT_FACTOR = 0.9
EPSILON = 0.1
NUM_EPISODES = 100

def discretize_state(state):
    def categorize(value, ranges):
        for i, r in enumerate(ranges):
            if value <= r:
                return i
        return len(ranges)

    discretized = (
        state['traffic_light_phases']['7'],
        state['traffic_light_phases']['8'],
        categorize(state['vehicle_count'], [10, 30, 40]),
        categorize(state['average_speed'], [0.5, 1, 2]),
        tuple(categorize(length, [0, 5, 8]) for length in state['queue_lengths'].values())
    )
    return discretized

def calculate_reward(state):
    avg_speed = state['average_speed']
    total_queue = sum(state['queue_lengths'].values())
    return avg_speed - 0.1 * total_queue

def q_learning(num_episodes=NUM_EPISODES, steps_per_episode=3600):
    Q = {}
    metrics_history = []

    for episode in range(num_episodes):
        try:
            traci.start(['sumo', '-c', 'my_simulation.sumocfg'])
            state = get_state()
            state = discretize_state(state)
            
            total_reward = 0
            
            for step in range(steps_per_episode):
                if random.random() < EPSILON:
                    action = random.choice([0, 1, 2, 3])
                else:
                    action = np.argmax(Q.get(state, np.zeros(4)))
                
                take_action(action)
                traci.simulationStep()
                
                next_state = get_state()
                reward = calculate_reward(next_state)
                next_state = discretize_state(next_state)
                
                if state not in Q:
                    Q[state] = np.zeros(4)
                
                Q[state][action] = Q[state][action] + LEARNING_RATE * (reward + DISCOUNT_FACTOR * np.max(Q.get(next_state, np.zeros(4))) - Q[state][action])
                
                state = next_state
                total_reward += reward
            
            metrics = calculate_metrics()
            metrics['episode'] = episode
            metrics['total_reward'] = total_reward
            metrics_history.append(metrics)
            
            print(f"Episode {episode} completed. Total reward: {total_reward}")
        
        except traci.exceptions.FatalTraCIError as e:
            print(f"TraCI error in episode {episode}: {e}")
        finally:
            traci.close()
    
    return Q, metrics_history

def get_state():
    return {
        'traffic_light_phases': {
            '7': traci.trafficlight.getPhase('7'),
            '8': traci.trafficlight.getPhase('8')
        },
        'vehicle_count': traci.vehicle.getIDCount(),
        'average_speed': traci.vehicle.getIDCount() > 0 and sum(traci.vehicle.getSpeed(v) for v in traci.vehicle.getIDList()) / traci.vehicle.getIDCount() or 0,
        'queue_lengths': {
            edge: traci.edge.getLastStepHaltingNumber(edge) for edge in 
            ['1to7', '3to7', '5to8', 'e1to7', '7toe1', '2to8', '8to2', '8to7', '7to1', '7to4', '4to7', '7to3', '8to6', '6to8', '8to5']
        }
    }

def take_action(action):
    if action == 0:
        traci.trafficlight.setPhase('7', (traci.trafficlight.getPhase('7') + 1) % 4)
    elif action == 1:
        traci.trafficlight.setPhase('8', (traci.trafficlight.getPhase('8') + 1) % 4)
    elif action == 2:
        traci.trafficlight.setPhase('7', (traci.trafficlight.getPhase('7') + 1) % 4)
        traci.trafficlight.setPhase('8', (traci.trafficlight.getPhase('8') + 1) % 4)
    # Action 3 does nothing (keeps current phases)

def calculate_metrics():
    return {
        'average_waiting_time': sum(traci.vehicle.getAccumulatedWaitingTime(v) for v in traci.vehicle.getIDList()) / max(len(traci.vehicle.getIDList()), 1),
        'average_speed': sum(traci.vehicle.getSpeed(v) for v in traci.vehicle.getIDList()) / max(len(traci.vehicle.getIDList()), 1),
        'total_queue_length': sum(traci.edge.getLastStepHaltingNumber(edge) for edge in traci.edge.getIDList())
    }

"""
# Run Q-learning
Q, metrics_history = q_learning()

# Print final metrics
final_metrics = metrics_history[-1]
print("Final metrics after Q-learning:")
print(f"Average waiting time: {final_metrics['average_waiting_time']:.2f} seconds")
print(f"Average speed: {final_metrics['average_speed']:.2f} m/s")
print(f"Total queue length: {final_metrics['total_queue_length']}")
print(f"Total reward: {final_metrics['total_reward']:.2f}")
"""


'\n# Run Q-learning\nQ, metrics_history = q_learning()\n\n# Print final metrics\nfinal_metrics = metrics_history[-1]\nprint("Final metrics after Q-learning:")\nprint(f"Average waiting time: {final_metrics[\'average_waiting_time\']:.2f} seconds")\nprint(f"Average speed: {final_metrics[\'average_speed\']:.2f} m/s")\nprint(f"Total queue length: {final_metrics[\'total_queue_length\']}")\nprint(f"Total reward: {final_metrics[\'total_reward\']:.2f}")\n'

Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 57.12s
 TraCI-Duration: 55.90s
 Real time factor: 63.0252
 UPS: 2195.430672
Vehicles: 
 Inserted: 663 (Loaded: 1491)
 Running: 39
 Waiting: 828
Teleports: 67 (Jam: 7, Yield: 56, Wrong Lane: 4)

Simulation metrics: {'average_waiting_time': 4226.389444444444, 'average_travel_time': 2583.2052777777776, 'average_speed': 0.4487425447642984, 'total_distance_travelled': 4173098.795994839, 'average_fuel_consumption': 36.47448543241946, 'average_co2_emission': 84847.17495936058, 'throughput': 0}


Episode 99 completed. Total reward: -5400.5915304935415
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 12.84s
 TraCI-Duration: 12.00s
 Real time factor: 280.461
 UPS: 9656.746650
Vehicles: 
 Inserted: 782 (Loaded: 1491)
 Running: 27
 Waiting: 709
Teleports: 36 (Jam: 8, Yield: 24, Wrong Lane: 4)

Final metrics after Q-learning:
Average waiting time: 79.22 seconds
Average speed: 0.58 m/s
Total queue length: 18
Total reward: -5400.59

roughput': 0}

# DQN

In [None]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import deque
import traci

# Hyperparameters
EPSILON = 1.0  # Exploration rate
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.995
DISCOUNT_FACTOR = 0.99
LEARNING_RATE = 0.005
NUM_EPISODES = 500
BATCH_SIZE = 64
TARGET_UPDATE_FREQ = 5
MAX_MEMORY_SIZE = 10000
CONVERGENCE_WINDOW = 100
CONVERGENCE_THRESHOLD = 0.01
reward_window = deque(maxlen=CONVERGENCE_WINDOW)

# Check if CUDA is available and use GPU if possible
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# DQN Model (Neural Network for Q-function approximation)
class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

# Replay Buffer
class ReplayBuffer:
    def __init__(self):
        self.memory = deque(maxlen=MAX_MEMORY_SIZE)
    
    def push(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def sample(self):
        return random.sample(self.memory, BATCH_SIZE)
    
    def size(self):
        return len(self.memory)

# Discretize State Function
def discretize_state(state):
    def categorize(value, ranges):
        for i, r in enumerate(ranges):
            if value <= r:
                return i
        return len(ranges)

    discretized = [
        state['traffic_light_phases']['7'],
        state['traffic_light_phases']['8'],
        categorize(state['vehicle_count'], [10, 30, 40]),
        categorize(state['average_speed'], [0.5, 1, 2])
    ]
    discretized.extend([categorize(length, [0, 5, 8]) for length in state['queue_lengths'].values()])
    return discretized

# Reward Calculation Function (using full state)
def calculate_reward(state):
    avg_speed = state['average_speed']
    total_queue = sum(state['queue_lengths'].values())
    waiting_vehicles = sum(traci.edge.getLastStepHaltingNumber(edge) for edge in state['queue_lengths'])
    return avg_speed - 0.01 * total_queue - 0.2 * waiting_vehicles

# Get State Function
def get_state():
    return {
        'traffic_light_phases': {
            '7': traci.trafficlight.getPhase('7'),
            '8': traci.trafficlight.getPhase('8')
        },
        'vehicle_count': traci.vehicle.getIDCount(),
        'average_speed': traci.vehicle.getIDCount() > 0 and sum(traci.vehicle.getSpeed(v) for v in traci.vehicle.getIDList()) / traci.vehicle.getIDCount() or 0,
        'queue_lengths': {
            edge: traci.edge.getLastStepHaltingNumber(edge) for edge in 
            ['1to7', '3to7', '5to8', 'e1to7', '7toe1', '2to8', '8to2', '8to7', '7to1', '7to4', '4to7', '7to3', '8to6', '6to8', '8to5']
        }
    }

def take_action(action):
    if action == 0:
        traci.trafficlight.setPhase('7', (traci.trafficlight.getPhase('7') + 1) % 4)
    elif action == 1:
        traci.trafficlight.setPhase('8', (traci.trafficlight.getPhase('8') + 1) % 4)
    elif action == 2:
        traci.trafficlight.setPhase('7', (traci.trafficlight.getPhase('7') + 1) % 4)
        traci.trafficlight.setPhase('8', (traci.trafficlight.getPhase('8') + 1) % 4)
    # Action 3 does nothing (keeps current phases)

def calculate_metrics():
    return {
        'average_waiting_time': sum(traci.vehicle.getAccumulatedWaitingTime(v) for v in traci.vehicle.getIDList()) / max(len(traci.vehicle.getIDList()), 1),
        'average_speed': sum(traci.vehicle.getSpeed(v) for v in traci.vehicle.getIDList()) / max(len(traci.vehicle.getIDList()), 1),
        'total_queue_length': sum(traci.edge.getLastStepHaltingNumber(edge) for edge in traci.edge.getIDList())
    }

# Deep Q-Learning Algorithm
def deep_q_learning(num_episodes=NUM_EPISODES, steps_per_episode=3600):
    input_dim = 5  # Dimension of the state space after discretization
    input_dim = 2 + 1 + 1 + 15  # 2 traffic lights, vehicle count, average speed, and queue lengths
    output_dim = 4  # Number of possible actions (0-3)
    CONVERGENCE_WINDOW = 100
    CONVERGENCE_THRESHOLD = 0.01
    reward_window = deque(maxlen=CONVERGENCE_WINDOW)
    prev_avg_reward = float('-inf')
    reward_mean = 0
    reward_std = 0
    episode_count = 0
    losses  = []
    
    # Initialize Q-networks and optimizer
    policy_net = DQN(input_dim, output_dim).to(device)  # Move model to device (GPU/CPU)
    target_net = DQN(input_dim, output_dim).to(device)  # Move model to device (GPU/CPU)
    target_net.load_state_dict(policy_net.state_dict())  # Initialize target network
    optimizer = optim.Adam(policy_net.parameters(), lr=LEARNING_RATE)
    
    # Experience replay buffer
    replay_buffer = ReplayBuffer()

    # Metrics history
    metrics_history = []

    global EPSILON
    
    for episode in range(num_episodes):
        try:
            traci.start(['sumo', '-c', 'my_simulation.sumocfg'])
            state = get_state()  # Get the full state
            discretized_state = discretize_state(state)  # Get the discretized state
            total_reward = 0
            done = False

            for step in range(steps_per_episode):
                # Select action using epsilon-greedy policy
                if random.random() < EPSILON:
                    action = random.choice([0, 1, 2, 3])
                else:
                    with torch.no_grad():
                        action = torch.argmax(policy_net(torch.tensor(discretized_state).float().to(device))).item()  # Move tensor to GPU

                take_action(action)  # Implement your action-taking logic here
                traci.simulationStep()

                next_state = get_state()  # Get the full next state
                next_discretized_state = discretize_state(next_state)  # Get the discretized next state
                reward = calculate_reward(next_state)  # Use full next state for reward calculation
                episode_count += 1
                delta = reward - reward_mean
                reward_mean += delta / episode_count
                reward_std += delta * (reward - reward_mean)
                normalized_reward = (reward - reward_mean) / (np.sqrt(reward_std / episode_count) + 1e-5)

                # Store transition in replay buffer
                replay_buffer.push(discretized_state, action, normalized_reward, next_discretized_state, done)

                # Sample a batch from the replay buffer
                if replay_buffer.size() >= BATCH_SIZE:
                    batch = replay_buffer.sample()
                    states, actions, rewards, next_states, dones = zip(*batch)

                    states = torch.tensor([list(s) for s in states]).float().to(device)  # Move tensors to GPU
                    actions = torch.tensor(actions).long().to(device)  # Move tensors to GPU
                    rewards = torch.tensor(rewards).float().to(device)  # Move tensors to GPU
                    next_states = torch.tensor([list(s) for s in next_states]).float().to(device)  # Move tensors to GPU

                    # Compute Q-values for current states
                    q_values = policy_net(states)
                    q_value = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)

                    # Compute Q-values for next states using target network
                    next_q_values = target_net(next_states)
                    next_q_value = next_q_values.max(1)[0]

                    # Compute target Q-values
                    target_q_value = rewards + DISCOUNT_FACTOR * next_q_value

                    # Compute loss
                    loss = nn.MSELoss()(q_value, target_q_value)
                    losses.append(loss)

                    # Optimize the model
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                discretized_state = next_discretized_state
                total_reward += reward

            # Decay epsilon
            if EPSILON > EPSILON_MIN:
                EPSILON *= EPSILON_DECAY

            # Update target network
            if episode % TARGET_UPDATE_FREQ == 0:
                target_net.load_state_dict(policy_net.state_dict())

            metrics = calculate_metrics()  # Replace with the actual metric function
            metrics['episode'] = episode
            metrics['total_reward'] = total_reward
            metrics_history.append(metrics)

            print(f"Episode {episode} completed. Total reward: {total_reward}")
            reward_window.append(total_reward)
            if len(reward_window) == CONVERGENCE_WINDOW:
                avg_reward = sum(reward_window) / CONVERGENCE_WINDOW
                if episode > CONVERGENCE_WINDOW and abs(avg_reward - prev_avg_reward) < CONVERGENCE_THRESHOLD:
                    print(f"Converged at episode {episode}")
                    break
                prev_avg_reward = avg_reward

        except traci.exceptions.FatalTraCIError as e:
            print(f"TraCI error in episode {episode}: {e}")
        finally:
            traci.close()

    return policy_net, metrics_history


# Run Deep Q-learning
policy_net, metrics_history = deep_q_learning()

# Print final metrics
final_metrics = metrics_history[-1]
print("Final metrics after DQN: ")
print(f"Average waiting time: {final_metrics['average_waiting_time']:.2f} seconds")
print(f"Average speed: {final_metrics['average_speed']:.2f} m/s")
print(f"Total queue length: {final_metrics['total_queue_length']}")
print(f"Total reward: {final_metrics['total_reward']:.2f}")

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 52929 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 0 completed. Total reward: 2344.528198381199
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37289ms
 Real time factor: 96.5432
 UPS: 2213.387326
Vehicles: 
 Inserted: 1480 (Loaded: 1491)
 Running: 27
 Waiting: 11

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 41259 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 1 completed. Total reward: 3566.855653929953
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38183ms
 Real time factor: 94.2828
 UPS: 1974.648404
Vehicles: 
 Inserted: 1491
 Running: 17
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 48177 ***
Loading net-file from 'my_network.net.xml' ... done (4ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 2 completed. Total reward: 3044.351560291958
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37736ms
 Real time factor: 95.3996
 UPS: 2084.534662
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 18
 Waiting: 1

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 44263 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 3 completed. Total reward: 3646.5528951683646
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36139ms
 Real time factor: 99.6154
 UPS: 2066.548604
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 15
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 40197 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 4 completed. Total reward: 2895.4801412545335
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37152ms
 Real time factor: 96.8992
 UPS: 2107.611972
Vehicles: 
 Inserted: 1491
 Running: 20
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 59355 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 5 completed. Total reward: 4002.5706931659292
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38252ms
 Real time factor: 94.1127
 UPS: 1897.234132
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 19
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 58803 ***
Loading net-file from 'my_network.net.xml' ... done (4ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 6 completed. Total reward: 3919.3312311232094
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36498ms
 Real time factor: 98.6355
 UPS: 2004.301606
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 15
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 53067 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 7 completed. Total reward: 3135.0545853453605
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38091ms
 Real time factor: 94.5105
 UPS: 2018.009504
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 20
 Waiting: 1

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 46861 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 8 completed. Total reward: -2740.1665653449986
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 39459ms
 Real time factor: 91.2339
 UPS: 2663.220051
Vehicles: 
 Inserted: 1411 (Loaded: 1491)
 Running: 30
 Waiting: 80
Teleports: 3 (Jam: 1, Yield: 2)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 53867 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 9 completed. Total reward: 3293.493349960831
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38222ms
 Real time factor: 94.1866
 UPS: 2010.386688
Vehicles: 
 Inserted: 1491
 Running: 16
 Waiting: 0

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 44099 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 10 c



Episode 11 completed. Total reward: 4469.993754709114
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36846ms
 Real time factor: 97.704
 UPS: 1928.160452
Vehicles: 
 Inserted: 1491
 Running: 19
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 53479 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 12 completed. Total reward: 3784.668000812925
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37135ms
 Real time factor: 96.9436
 UPS: 1988.474485
Vehicles: 
 Inserted: 1491
 Running: 21
 Waiting: 0

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 59337 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 13 completed. Total reward: 3



Episode 14 completed. Total reward: 3830.3413633171335
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37188ms
 Real time factor: 96.8054
 UPS: 1992.309347
Vehicles: 
 Inserted: 1491
 Running: 15
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 59731 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 15 completed. Total reward: 2810.279402555023
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37819ms
 Real time factor: 95.1902
 UPS: 2137.655676
Vehicles: 
 Inserted: 1491
 Running: 15
 Waiting: 0

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 34611 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 16 completed. Total reward:



Episode 17 completed. Total reward: 2684.82845189388
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38916ms
 Real time factor: 92.5069
 UPS: 2088.421215
Vehicles: 
 Inserted: 1480 (Loaded: 1491)
 Running: 24
 Waiting: 11
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 42349 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 18 completed. Total reward: 2800.8157069054714
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37739ms
 Real time factor: 95.392
 UPS: 2113.622513
Vehicles: 
 Inserted: 1478 (Loaded: 1491)
 Running: 18
 Waiting: 13

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 60049 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episo



Episode 20 completed. Total reward: 3605.3490967737325
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37280ms
 Real time factor: 96.5665
 UPS: 2035.675966
Vehicles: 
 Inserted: 1491
 Running: 16
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 59997 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 21 completed. Total reward: 3268.9823128781677
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36843ms
 Real time factor: 97.7119
 UPS: 2107.971664
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 20
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 46523 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 22 completed. Total reward: 3450.2764593318825
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36385ms
 Real time factor: 98.9419
 UPS: 2095.671293
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 17
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 36821 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 23 completed. Total reward: 2812.1440592402478
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36872ms
 Real time factor: 97.6351
 UPS: 2181.031677
Vehicles: 
 Inserted: 1480 (Loaded: 1491)
 Running: 20
 Waiting: 11

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 48965 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Epi



Episode 25 completed. Total reward: 3521.3677232544055
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38659ms
 Real time factor: 93.1219
 UPS: 1987.842417
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 16
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 60357 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 26 completed. Total reward: 3738.1229769581887
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37436ms
 Real time factor: 96.1641
 UPS: 2017.576664
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 16
 Waiting: 1

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 55681 ***
Loading net-file from 'my_network.net.xml' ... done (6ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Epis



Episode 28 completed. Total reward: 3503.4479956191612
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37775ms
 Real time factor: 95.3011
 UPS: 1993.328921
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 18
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 44145 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 29 completed. Total reward: 3613.827370155735
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37750ms
 Real time factor: 95.3642
 UPS: 1992.291391
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 16
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 52135 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 30 completed. Total reward: 4149.117944236523
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36414ms
 Real time factor: 98.8631
 UPS: 2010.600319
Vehicles: 
 Inserted: 1491
 Running: 13
 Waiting: 0

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 50563 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 31 completed.



Episode 32 completed. Total reward: 3403.2876961604843
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36810ms
 Real time factor: 97.7995
 UPS: 2059.847867
Vehicles: 
 Inserted: 1478 (Loaded: 1491)
 Running: 21
 Waiting: 13
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 42191 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 33 completed. Total reward: 3938.0397035825463
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37160ms
 Real time factor: 96.8784
 UPS: 1973.923574
Vehicles: 
 Inserted: 1491
 Running: 18
 Waiting: 0

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 48217 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 34 completed. Total reward: 4151.392426602017
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 36216ms
 Real time factor: 99.4036
 UPS: 1989.838745
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 18
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 45203 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 35 completed. Total reward: 3410.0458147792365
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37466ms
 Real time factor: 96.0871
 UPS: 2067.234292
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 18
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 47901 ***
Episode 38 completed. Total reward: 4286.503588617368
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37564ms
 Real time factor: 95.8364
 UPS: 1914.253008
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 13
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 40313 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 39 completed. Total reward: 3559.518956996281
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38003ms
 Real time factor: 94.7294
 UPS: 1986.132674
Vehicles: 
 Inserted: 1491
 Running: 16
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 43493 ***
Loading net-file from 'my_network.net.xml' ... done (8ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 40 completed. Total reward: 3733.378639666793
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37752ms
 Real time factor: 95.3592
 UPS: 1994.887688
Vehicles: 
 Inserted: 1487 (Loaded: 1491)
 Running: 17
 Waiting: 4
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 38867 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 41 completed. Total reward: 3004.116686408548
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37340ms
 Real time factor: 96.4114
 UPS: 2119.389395
Vehicles: 
 Inserted: 1491
 Running: 19
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 53519 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 42 completed. Total reward: 4096.473194253591
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37112ms
 Real time factor: 97.0037
 UPS: 1976.826902
Vehicles: 
 Inserted: 1491
 Running: 14
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 46319 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00
Episode 43 completed. Total reward: 4850.6528652237685
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37315ms
 Real time factor: 96.4759
 UPS: 1848.773951
Vehicles: 
 Inserted: 1491
 Running: 15
 Waiting: 0

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 46377 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 44 completed. Total reward: 3844.706365294271
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38783ms
 Real time factor: 92.8242
 UPS: 1913.183611
Vehicles: 
 Inserted: 1491
 Running: 20
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 42805 ***
Loading net-file from 'my_network.net.xml' ... done (7ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 45 completed. Total reward: 3833.1069882528504
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 40273ms
 Real time factor: 89.3899
 UPS: 1849.328334
Vehicles: 
 Inserted: 1491
 Running: 19
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 55251 ***
Loading net-file from 'my_network.net.xml' ... done (6ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 46 completed. Total reward: 3482.8235250019284
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 39435ms
 Real time factor: 91.2895
 UPS: 1965.614302
Vehicles: 
 Inserted: 1491
 Running: 19
 Waiting: 0
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 36889 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 47 completed. Total reward: 2093.8633039544143
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38889ms
 Real time factor: 92.5712
 UPS: 2110.313971
Vehicles: 
 Inserted: 1439 (Loaded: 1491)
 Running: 44
 Waiting: 52
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 41991 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 48 completed. Total reward: 4036.0229112042734
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 38997ms
 Real time factor: 92.3148
 UPS: 1897.915224
Vehicles: 
 Inserted: 1488 (Loaded: 1491)
 Running: 19
 Waiting: 3
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 45569 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




Episode 49 completed. Total reward: 3275.7805290616966
Simulation ended at time: 3600.00
Reason: TraCI requested termination.
Performance: 
 Duration: 37785ms
 Real time factor: 95.2759
 UPS: 2048.775969
Vehicles: 
 Inserted: 1490 (Loaded: 1491)
 Running: 26
 Waiting: 1
Teleports: 1 (Yield: 1)

 Retrying in 1 seconds
Loading configuration ... done.
***Starting server on port 51467 ***
Loading net-file from 'my_network.net.xml' ... done (5ms).
Loading done.
Simulation version 1.4.0 started with time: 0.00




In [None]:
import matplotlib.pyplot as plt

# Filter data for every 20th episode
filtered_metrics = [m for m in metrics_history if m['episode'] % 1 == 0]

plt.figure(figsize=(10, 6))
plt.plot(
    [m['episode'] for m in filtered_metrics], 
    [m['average_waiting_time'] for m in filtered_metrics], 
    label='Average Waiting Time'
)
plt.xlabel('Episode')
plt.ylabel('Average Waiting Time')
plt.title('Episode vs. Average Waiting Time (Every 20th Episode)')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import matplotlib.pyplot as plt

def plot_metrics(metrics_history):
    episodes = [m['episode'] for m in metrics_history]
    rewards = [m['total_reward'] for m in metrics_history]
    avg_waiting_times = [m['average_waiting_time'] for m in metrics_history]
    avg_speeds = [m['average_speed'] for m in metrics_history]
    queue_lengths = [m['total_queue_length'] for m in metrics_history]

    fig, axs = plt.subplots(2, 2, figsize=(15, 15))
    fig.suptitle('DQN Training Metrics')

    axs[0, 0].plot(episodes, rewards)
    axs[0, 0].set_title('Total Reward vs Episode')
    axs[0, 0].set_xlabel('Episode')
    axs[0, 0].set_ylabel('Total Reward')

    axs[0, 1].plot(episodes, avg_waiting_times)
    axs[0, 1].set_title('Average Waiting Time vs Episode')
    axs[0, 1].set_xlabel('Episode')
    axs[0, 1].set_ylabel('Average Waiting Time (s)')

    axs[1, 0].plot(episodes, avg_speeds)
    axs[1, 0].set_title('Average Speed vs Episode')
    axs[1, 0].set_xlabel('Episode')
    axs[1, 0].set_ylabel('Average Speed (m/s)')

    axs[1, 1].plot(episodes, queue_lengths)
    axs[1, 1].set_title('Total Queue Length vs Episode')
    axs[1, 1].set_xlabel('Episode')
    axs[1, 1].set_ylabel('Total Queue Length')

    plt.tight_layout()
    plt.show()

# Call this function after training
plot_metrics(metrics_history)
plot_metrics(filtered_metrics)


Reduced States

In [None]:
import random
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from collections import deque
import traci

# Hyperparameters
EPSILON = 1.0
EPSILON_MIN = 0.01
EPSILON_DECAY = 0.995
DISCOUNT_FACTOR = 0.99
LEARNING_RATE = 0.001
NUM_EPISODES = 1000
BATCH_SIZE = 64
TARGET_UPDATE_FREQ = 10
MAX_MEMORY_SIZE = 10000

class DQN(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, output_dim)
        
    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)


class ReplayBuffer:
    def __init__(self):
        self.memory = deque(maxlen=MAX_MEMORY_SIZE)
    
    def push(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))
    
    def sample(self):
        return random.sample(self.memory, BATCH_SIZE)
    
    def size(self):
        return len(self.memory)

def discretize_state(state):
    def categorize(value, ranges):
        for i, r in enumerate(ranges):
            if value <= r:
                return i
        return len(ranges)

    discretized = [
        state['traffic_light_phases']['7'],
        state['traffic_light_phases']['8'],
        categorize(state['vehicle_count'], [10, 30, 40]),
        categorize(state['average_speed'], [0.5, 1, 2]),
        categorize(state['total_queue_length'], [10, 30, 50])
    ]
    return discretized

def calculate_reward(state):
    avg_speed = state['average_speed']
    total_queue = state['total_queue_length']
    waiting_vehicles = state['waiting_vehicles']
    return avg_speed - 0.1 * total_queue - 0.05 * waiting_vehicles

def get_state():
    return {
        'traffic_light_phases': {
            '7': traci.trafficlight.getPhase('7'),
            '8': traci.trafficlight.getPhase('8')
        },
        'vehicle_count': traci.vehicle.getIDCount(),
        'average_speed': traci.vehicle.getIDCount() > 0 and sum(traci.vehicle.getSpeed(v) for v in traci.vehicle.getIDList()) / traci.vehicle.getIDCount() or 0,
        'total_queue_length': sum(traci.edge.getLastStepHaltingNumber(edge) for edge in traci.edge.getIDList()),
        'waiting_vehicles': sum(1 for v in traci.vehicle.getIDList() if traci.vehicle.getWaitingTime(v) > 0)
    }

# Select device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


def deep_q_learning(num_episodes=NUM_EPISODES, steps_per_episode=3600):
    input_dim = 5  # Updated input dimension
    output_dim = 4  # Number of possible actions (0-3)
    
    policy_net = DQN(input_dim, output_dim).float().to(device)
    target_net = DQN(input_dim, output_dim).float().to(device)
    target_net.load_state_dict(policy_net.state_dict())
    optimizer = optim.Adam(policy_net.parameters(), lr=LEARNING_RATE)
    
    replay_buffer = ReplayBuffer()
    metrics_history = []
    global EPSILON
    
    for episode in range(num_episodes):
        try:
            traci.start(['sumo', '-c', 'my_simulation.sumocfg'])
            state = get_state()
            discretized_state = discretize_state(state)
            total_reward = 0
            done = False

            for step in range(steps_per_episode):
                if random.random() < EPSILON:
                    action = random.choice([0, 1, 2, 3])
                else:
                    with torch.no_grad():
                        state_tensor = torch.tensor(discretized_state, dtype=torch.float32, device=device)
                        action = torch.argmax(policy_net(state_tensor)).item()

                take_action(action)
                traci.simulationStep()

                next_state = get_state()
                next_discretized_state = discretize_state(next_state)
                reward = calculate_reward(next_state)

                replay_buffer.push(discretized_state, action, reward, next_discretized_state, done)

                if replay_buffer.size() >= BATCH_SIZE:
                    batch = replay_buffer.sample()
                    states, actions, rewards, next_states, dones = zip(*batch)

                    states = torch.tensor([list(s) for s in states], dtype=torch.float32, device=device)
                    actions = torch.tensor(actions, dtype=torch.long, device=device)
                    rewards = torch.tensor(rewards, dtype=torch.float32, device=device)
                    next_states = torch.tensor([list(s) for s in next_states], dtype=torch.float32, device=device)

                    q_values = policy_net(states)
                    q_value = q_values.gather(1, actions.unsqueeze(1)).squeeze(1)

                    next_q_values = target_net(next_states)
                    next_q_value = next_q_values.max(1)[0]

                    target_q_value = rewards + DISCOUNT_FACTOR * next_q_value

                    loss = nn.MSELoss()(q_value, target_q_value)

                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                discretized_state = next_discretized_state
                total_reward += reward

            if EPSILON > EPSILON_MIN:
                EPSILON *= EPSILON_DECAY

            if episode % TARGET_UPDATE_FREQ == 0:
                target_net.load_state_dict(policy_net.state_dict())

            metrics = calculate_metrics()
            metrics['episode'] = episode
            metrics['total_reward'] = total_reward
            metrics_history.append(metrics)

            print(f"Episode {episode} completed. Total reward: {total_reward}")

        except traci.exceptions.FatalTraCIError as e:
            print(f"TraCI error in episode {episode}: {e}")
        finally:
            traci.close()

    return policy_net, metrics_history


# Implement these functions according to your SUMO setup

def calculate_metrics():
    return {
        'average_waiting_time': sum(traci.vehicle.getAccumulatedWaitingTime(v) for v in traci.vehicle.getIDList()) / max(len(traci.vehicle.getIDList()), 1),
        'average_speed': sum(traci.vehicle.getSpeed(v) for v in traci.vehicle.getIDList()) / max(len(traci.vehicle.getIDList()), 1),
        'total_queue_length': sum(traci.edge.getLastStepHaltingNumber(edge) for edge in traci.edge.getIDList())
    }

def take_action(action):
    if action == 0:
        traci.trafficlight.setPhase('7', (traci.trafficlight.getPhase('7') + 1) % 4)
    elif action == 1:
        traci.trafficlight.setPhase('8', (traci.trafficlight.getPhase('8') + 1) % 4)
    elif action == 2:
        traci.trafficlight.setPhase('7', (traci.trafficlight.getPhase('7') + 1) % 4)
        traci.trafficlight.setPhase('8', (traci.trafficlight.getPhase('8') + 1) % 4)
    # Action 3 does nothing (keeps current phases)

# Run Deep Q-learning
# policy_net, metrics_history = deep_q_learning()

# # Print final metrics
# final_metrics = metrics_history[-1]
# print("Final metrics after DQN: ")
# print(f"Average waiting time: {final_metrics['average_waiting_time']:.2f} seconds")
# print(f"Average speed: {final_metrics['average_speed']:.2f} m/s")
# print(f"Total queue length: {final_metrics['total_queue_length']}")
# print(f"Total reward: {final_metrics['total_reward']:.2f}")

In [None]:
# traci.close()