## simple reward

In [None]:
def calculate_reward(self):
    # 1. Total Waiting Time (Penalty)
    waiting_time_road1 = traci.edge.getWaitingTime("road1")
    waiting_time_road2 = traci.edge.getWaitingTime("road2")
    total_waiting_time = waiting_time_road1 + waiting_time_road2

    # 2. Total Queue Length (Penalty)
    queue_length_road1 = traci.edge.getLastStepVehicleNumber("road1")
    queue_length_road2 = traci.edge.getLastStepVehicleNumber("road2")
    total_queue_length = queue_length_road1 + queue_length_road2

    # 3. Throughput (Reward)
    throughput = traci.edge.getLastStepVehicleNumber("road1") + traci.edge.getLastStepVehicleNumber("road2")

    # 4. Emergency Vehicle Priority (Reward)
    emergency_vehicles = 0
    for vehicle_id in traci.vehicle.getIDList():
        if traci.vehicle.getTypeID(vehicle_id) == "emergency":
            emergency_vehicles += 1

    # 5. Phase Switching Penalty (Penalty)
    phase_switching_penalty = 1 if self.last_action != self.current_action else 0

    # 6. Fairness Penalty (Penalty)
    green_time_road1 = traci.trafficlight.getPhaseDuration("intersection")[0]
    green_time_road2 = traci.trafficlight.getPhaseDuration("intersection")[1]
    fairness_penalty = abs(green_time_road1 - green_time_road2)

    # Combine components with weights
    weights = {
        "waiting_time": -0.1,  # Penalize waiting time
        "queue_length": -0.05,  # Penalize queue length
        "throughput": 0.2,      # Reward throughput
        "emergency_vehicles": 1.0,  # Reward emergency vehicles
        "phase_switching": -0.2,  # Penalize frequent phase switching
        "fairness": -0.1  # Penalize unfair green time distribution
    }

    reward = (
        weights["waiting_time"] * total_waiting_time +
        weights["queue_length"] * total_queue_length +
        weights["throughput"] * throughput +
        weights["emergency_vehicles"] * emergency_vehicles +
        weights["phase_switching"] * phase_switching_penalty +
        weights["fairness"] * fairness_penalty
    )

    return reward

In [None]:
print(f"Reward: {reward}, Waiting Time: {total_waiting_time}, Queue Length: {total_queue_length}, Throughput: {throughput}")

## sparse reward

In [None]:
import traci
import numpy as np
from dqn_agent import DQNAgent  # Your DQN agent implementation

# Define the SUMO environment
class SUMOEnv:
    def __init__(self, sumo_config):
        self.sumo_cmd = ["sumo-gui", "-c", sumo_config]
        self.intersections = ["intersection1", "intersection2", "intersection3", "intersection4"]
        self.state_dim = len(self.intersections) * 4  # 4 features per intersection
        self.action_dim = len(self.intersections)  # 1 action per intersection

    def get_state(self):
        state = []
        for intersection in self.intersections:
            queue_length_ns = traci.edge.getLastStepVehicleNumber(f"{intersection}_ns")
            queue_length_ew = traci.edge.getLastStepVehicleNumber(f"{intersection}_ew")
            waiting_time_ns = traci.edge.getWaitingTime(f"{intersection}_ns")
            waiting_time_ew = traci.edge.getWaitingTime(f"{intersection}_ew")
            current_phase = traci.trafficlight.getPhase(intersection)
            throughput = traci.edge.getLastStepVehicleNumber(f"{intersection}_out")
            state.extend([queue_length_ns, queue_length_ew, waiting_time_ns, waiting_time_ew])
        return np.array(state)

    def perform_action(self, actions):
        for intersection, action in zip(self.intersections, actions):
            traci.trafficlight.setPhase(intersection, action)

    def calculate_episode_reward(self):
        total_waiting_time = 0
        total_queue_length = 0
        total_throughput = 0
        for intersection in self.intersections:
            total_waiting_time += traci.edge.getWaitingTime(f"{intersection}_ns") + traci.edge.getWaitingTime(f"{intersection}_ew")
            total_queue_length += traci.edge.getLastStepVehicleNumber(f"{intersection}_ns") + traci.edge.getLastStepVehicleNumber(f"{intersection}_ew")
            total_throughput += traci.edge.getLastStepVehicleNumber(f"{intersection}_out")
        reward = -(total_waiting_time + total_queue_length) + total_throughput
        return reward

# Training function
def train_agent(agent, env, episodes=1000, episode_length=100, batch_size=32):
    for episode in range(episodes):
        traci.start(env.sumo_cmd)
        states, actions, next_states = [], [], []
        total_reward = 0

        for step in range(episode_length):
            state = env.get_state()
            action = agent.act(state)
            env.perform_action(action)
            next_state = env.get_state()

            states.append(state)
            actions.append(action)
            next_states.append(next_state)

            traci.simulationStep()

        reward = env.calculate_episode_reward()
        for state, action, next_state in zip(states, actions, next_states):
            agent.remember(state, action, reward, next_state, done=True)

        print(f"Episode: {episode + 1}, Total Reward: {reward}, Epsilon: {agent.epsilon}")
        traci.close()

        if len(agent.memory) > batch_size:
            agent.replay(batch_size)

    agent.plot_results()

# Main function
if __name__ == "__main__":
    sumo_config = "your_config.sumocfg"  # Path to your SUMO config file
    env = SUMOEnv(sumo_config)
    agent = DQNAgent(state_dim=env.state_dim, action_dim=env.action_dim)
    train_agent(agent, env)