In [None]:
import traci


class Environment:
    def __init__(self, config, sumo_binary):
        self.sumo_binary = sumo_binary
        self.sumo_config = config
        self.action_space = self.define_action_space()

    def define_action_space(self):
        return {
            0: (24, 8, 8, 24),   1: (24, 8, 24, 8),    2: (32, 8, 16, 8),    3: (40, 8, 8, 8),     4: (24, 8, 16, 16),
            5: (24, 24, 8, 8),   6: (8, 32, 16, 8),    7: (16, 32, 8, 8),    8: (32, 8, 8, 16),    9: (8, 32, 8, 16),
            10: (8, 24, 24, 8),  11: (32, 16, 8, 8),   12: (8, 16, 24, 16),  13: (24, 16, 8, 16),  14: (8, 24, 16, 16),
            15: (8, 16, 8, 32),  16: (8, 16, 16, 24),  17: (8, 16, 32, 8),   18: (24, 16, 16, 8),  19: (8, 24, 8, 24),
            20: (16, 8, 24, 16), 21: (8, 40, 8, 8),    22: (8, 8, 40, 8),    23: (16, 8, 16, 24),  24: (8, 8, 32, 16),
            25: (8, 8, 16, 32),  26: (16, 8, 8, 32),   27: (8, 8, 8, 40),    28: (8, 8, 24, 24),   29: (16, 8, 32, 8),
            30: (16, 24, 16, 8), 31: (16, 16, 24, 8),  32: (16, 16, 16, 16), 33: (16, 24, 8, 16),  34: (16, 16, 8, 24)
        }

    def start_simulation(self):
        traci.start([self.sumo_binary, "-c", self.sumo_config])

    def close_simulation(self):
        traci.close()

    def compute_waiting_time(self):
        waiting_time = {}
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        for lane in controlled_lanes:
            waiting_time[lane] = traci.lane.getWaitingTime(lane)
        return waiting_time

    def compute_queue_length(self):
        """
        Compute queue lengths for each controlled lane using the number of vehicles
        with very low speeds (i.e., effectively stopped).
        """
        queue_length = {}
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        for lane in controlled_lanes:
            queue_length[lane] = traci.lane.getLastStepHaltingNumber(lane)
        return queue_length

    def update_traffic_light_program(self, junction_id, green_durations):
        if len(green_durations) != 4:
            raise ValueError("Must provide exactly 4 duration values")
        if not all(isinstance(x, int) and x > 0 for x in green_durations):
            raise ValueError("All durations must be positive integers")
        yellow_duration = 4
        phases = []
        states = [
            "GGgrrrrrrrrr", "yyyrrrrrrrrr",
            "rrrGGgrrrrrr", "rrryyyrrrrrr",
            "rrrrrrGGgrrr", "rrrrrryyyrrr",
            "rrrrrrrrrGGg", "rrrrrrrrryyy"
        ]
        for i, green_duration in enumerate(green_durations):
            phases.append(traci.trafficlight.Phase(
                duration=green_duration,
                state=states[i * 2],
                minDur=green_duration,
                maxDur=green_duration
            ))
            phases.append(traci.trafficlight.Phase(
                duration=yellow_duration,
                state=states[i * 2 + 1],
                minDur=yellow_duration,
                maxDur=yellow_duration
            ))
        logic = traci.trafficlight.Logic(
            programID="1",
            type=0,
            currentPhaseIndex=0,
            phases=phases
        )
        traci.trafficlight.setProgramLogic(junction_id, logic)
        return sum(green_durations) + len(green_durations) * yellow_duration  # Total cycle length

    def run_program_0(self):
        self.start_simulation()

        total_waiting_time = 0.0
        vehicle_counts = set()

        for _ in range(8400):
            traci.simulationStep()
            total_waiting_time += sum(self.compute_waiting_time().values())
            current_vehicles = set(traci.vehicle.getIDList())
            vehicle_counts.update(current_vehicles)

        total_vehicles = len(vehicle_counts)
        waiting_time_per_vehicle = total_waiting_time / total_vehicles if total_vehicles > 0 else 0
        self.close_simulation()
        print(f"\nDefault Program Results:")
        print(f"Total Wait Time: {total_waiting_time:.1f}")
        print(f"Vehicles: {total_vehicles}")
        print(f"Wait Time per Vehicle: {waiting_time_per_vehicle:.1f}")
        return total_waiting_time, total_vehicles, waiting_time_per_vehicle

    def _update_edge_flow(self, edge_id, duration, vehicle_waiting_times):
        """
        Track vehicle flow on a lane over a specified duration.
        
        Parameters:
          lane_id (str): The ID of the lane to track
          duration (int): Number of simulation steps to track
          
        Returns:
          in_count (int): Number of vehicles that entered the lane
          out_count (int): Number of vehicles that exited the lane
        """
        # Get initial set of vehicles on the lane
        initial_vehicles = set(traci.edge.getLastStepVehicleIDs(edge_id))
        
        in_count = 0
        out_count = 0
        current_vehicles = initial_vehicles.copy()
        
        # For each step in the duration, track vehicles entering and leaving
        for _ in range(duration):
            # Get vehicles before step
            before_step = current_vehicles.copy()
            
            # Advance simulation
            traci.simulationStep()

            """Calculating the waiting times of all the vehicles, Code starts here..."""
            vehicle_ids = traci.vehicle.getIDList()
            for vid in vehicle_ids:
                if vid not in vehicle_waiting_times:
                    vehicle_waiting_times[vid] = 0

                speed = traci.vehicle.getSpeed(vid)
                is_stopped_intentionally = traci.vehicle.isStopped(vid)

                # Count time only if not moving and not intentionally stopped
                if speed < 0.1 and not is_stopped_intentionally:
                    vehicle_waiting_times[vid] += 1  # 1 sec per step

            """...and ends here// Calculating the waiting times of all the vehicles"""


            # Get vehicles after step
            after_step = set(traci.edge.getLastStepVehicleIDs(edge_id))
            
            # Vehicles that entered = in after but not before
            new_vehicles = after_step - before_step
            in_count += len(new_vehicles)
            
            # Vehicles that exited = in before but not after
            exited_vehicles = before_step - after_step
            out_count += len(exited_vehicles)
            
            # Update current vehicle set
            current_vehicles = after_step
            
        return in_count, out_count, vehicle_waiting_times

    def apply_action_and_get_state(self, action, vehicle_waiting_times):
        """
        Apply an action (traffic light timing) and measure the resulting flow to determine state.
        
        Parameters:
          action (int): The action index to apply
          
        Returns:
          state (int): The discretized state based on flow ratios
          needed_steps (int): Number of simulation steps that were executed
        """
        # Get the green durations for the action
        green_durations = self.action_space[action]
        
        # Update traffic light program and get the total cycle length
        cycle_length = self.update_traffic_light_program("J1", green_durations, vehicle_waiting_times)
        
        #edge sets are hardcoded here:

        edge_set = ["E01", "E21", "E41", "E31"]
        
        # Each phase has green + yellow duration
        phase_durations = [green_duration + 4 for green_duration in green_durations]
        
        # Track flow ratios for each phase
        
        in_counts = [0, 0, 0, 0]
        out_counts = [0, 0, 0, 0]
        

        count = 0
        temp_vehicle_waiting_times = vehicle_waiting_times
        for duration in phase_durations:   
            """Label 1:"""
            #declaring a dictonaries of sets
            before_vehs = {}
            after_vehs = {}
            new_vehs = {}

            #initilazing the dictonaries with empty sets
            for i in range(0,4):
                if(i != count):
                    before_vehs[i] = set()
                    after_vehs[i] = set()
                    new_vehs[i] = set()
            
            #IDs of all the Vehicles are there in the before veh dictonary
            for i in range(0, 4):
                if(i != count):
                    before_vehs[i] = traci.edge.getLastStepVehicleIDs(edge_set[i])
                

            #running the current duration
            in_count, out_count, temp_vehicle_waiting_times = self._update_edge_flow(edge_set[count], duration, temp_vehicle_waiting_times)
            in_counts[count] += in_count
            out_counts[count] += out_count
            

            for i in range(0, 4):
                if(i != count):
                    after_vehs[i] = set(traci.edge.getLastStepVehicleIDs(edge_set[i]))

            #now we have the set of before vehicles and after vehicles, now calculates how many new vehicles are added
            for i in range(0, 4):
                if(i != count):
                    new_vehs[i] = after_vehs[i] - before_vehs[i]
            for i in range(0, 4):
                if(i != count):
                    in_counts[i] += len(new_vehs[i])
            count = count + 1

        """Calculating the waiting time for this cycle, Code section starts here..."""
        cycle_waiting_time = 0
        for vid in temp_vehicle_waiting_times:
            if vid not in vehicle_waiting_times:
                cycle_waiting_time += temp_vehicle_waiting_times[vid]
            else:
                cycle_waiting_time += (temp_vehicle_waiting_times[vid] - vehicle_waiting_times[vid])
        vehicle_waiting_times = temp_vehicle_waiting_times
        """...and ends here"""

        # Calculate flow ratio (with epsilon to avoid division by zero)
        epsilon = 1e-6
        in_counts = [x + epsilon for x in in_counts]

        flow_ratios = [out / inp if inp != 0 else 0 for out, inp in zip(out_counts, in_counts)]
                    
        state = self._discretize_flow_ratios(flow_ratios)
        
        return state, cycle_length, cycle_waiting_time, vehicle_waiting_times
        
    def _discretize_flow_ratios(self, flow_ratios, epsilon=1e-6):
        """
        Convert flow ratios into a discrete state.
        
        Parameters:
          flow_ratios (list): List of flow ratios for each phase
          epsilon (float): Small value to avoid division by zero
          
        Returns:
          state (int): Discretized state as an integer
        """
        state = 0
        base = 3  # Three bins: 0, 1, 2
        
        # comment : 
        # number_of_veh/

        for i, ratio in enumerate(flow_ratios):
            # Assign bin based on flow ratio
            if ratio >= 1.1:
                bin_value = 2  # Efficient clearing
            elif ratio >= 0.7:
                bin_value = 1  # Moderate clearing
            else:
                bin_value = 0  # Inefficient clearing

            #000 = 0
            #201 = 

            # Combine bins using base-3 encoding
            state += bin_value * (base ** i)
            
        return state

    def discretized_state(self, action, epsilon=1e-6):
        """
        Legacy method kept for compatibility with existing code.
        Uses apply_action_and_get_state internally.
        """
        state, _ = self.apply_action_and_get_state(action)
        return state

    def get_num_actions(self):
        return len(self.action_space)

In [2]:
import traci

# Start the SUMO simulation
traci.start(["sumo", "-c", r"C:\Users\Administrator\Desktop\MySumo\Stage 10\Stage 10\SUMO files\stage10.sumocfg"])

# Get all edge IDs
edges = traci.edge.getIDList()
link_junctions = traci.trafficlight.getControlledLinks("J1")

# Print them
for i in edges:
    print(i, ": number of lanes = ",traci.edge.getLaneNumber(i)," : From ",traci.edge.getFromJunction(i)," to ",traci.edge.getToJunction(i))

# print(edges)
print(traci.edge.getIDCount())
for j in link_junctions:
    print(j)
print(traci.trafficlight.getRedYellowGreenState("J1"))


# print(traci.edge.getLaneNumber("E10"))
# # Don't forget to close the simulation
traci.close()


:J1_0 : number of lanes =  1  : From  J1  to  J1
:J1_1 : number of lanes =  1  : From  J1  to  J1
:J1_10 : number of lanes =  1  : From  J1  to  J1
:J1_11 : number of lanes =  1  : From  J1  to  J1
:J1_12 : number of lanes =  1  : From  J1  to  J1
:J1_13 : number of lanes =  1  : From  J1  to  J1
:J1_14 : number of lanes =  1  : From  J1  to  J1
:J1_15 : number of lanes =  1  : From  J1  to  J1
:J1_2 : number of lanes =  1  : From  J1  to  J1
:J1_3 : number of lanes =  1  : From  J1  to  J1
:J1_4 : number of lanes =  1  : From  J1  to  J1
:J1_5 : number of lanes =  1  : From  J1  to  J1
:J1_6 : number of lanes =  1  : From  J1  to  J1
:J1_7 : number of lanes =  1  : From  J1  to  J1
:J1_8 : number of lanes =  1  : From  J1  to  J1
:J1_9 : number of lanes =  1  : From  J1  to  J1
E01 : number of lanes =  1  : From  J0  to  J1
E10 : number of lanes =  1  : From  J1  to  J0
E12 : number of lanes =  1  : From  J1  to  J2
E13 : number of lanes =  1  : From  J1  to  J3
E14 : number of lanes 

In [3]:

dict1 = {}
dict2 = {}
new = {}
for i in range(1,3):
    dict1[i] = set()
    dict2[i] = set()
    #new[i] = set()

t1 = {1, 2, 3, 4}
t2 = {3,4,5,6}
t3 = {4,5,6,7,9}
t4 = {1, 3, 7, 9}
dict1[1] = set(t1)
dict1[2] = set(t2)
dict2[1] = set(t3)
dict2[2] = set(t4)

new1 = {}
new2 = {}
for i in range(1,3):
    new1[i] = dict1[i] - dict2[i]
    new2[i] = dict2[i] - dict1[i]
print(len(dict1[1]))
print(len(dict1))
print(dict1)
print(len(dict2[1]))
print(new1)
print(new2)



4
2
{1: {1, 2, 3, 4}, 2: {3, 4, 5, 6}}
5
{1: {1, 2, 3}, 2: {4, 5, 6}}
{1: {9, 5, 6, 7}, 2: {1, 9, 7}}


In [None]:

"""Initial code"""

import traci


class Environment:
    def __init__(self, config, sumo_binary):
        self.sumo_binary = sumo_binary
        self.sumo_config = config
        self.action_space = self.define_action_space()

    def define_action_space(self):
        return {
            0: (24, 8, 8, 24),   1: (24, 8, 24, 8),    2: (32, 8, 16, 8),    3: (40, 8, 8, 8),     4: (24, 8, 16, 16),
            5: (24, 24, 8, 8),   6: (8, 32, 16, 8),    7: (16, 32, 8, 8),    8: (32, 8, 8, 16),    9: (8, 32, 8, 16),
            10: (8, 24, 24, 8),  11: (32, 16, 8, 8),   12: (8, 16, 24, 16),  13: (24, 16, 8, 16),  14: (8, 24, 16, 16),
            15: (8, 16, 8, 32),  16: (8, 16, 16, 24),  17: (8, 16, 32, 8),   18: (24, 16, 16, 8),  19: (8, 24, 8, 24),
            20: (16, 8, 24, 16), 21: (8, 40, 8, 8),    22: (8, 8, 40, 8),    23: (16, 8, 16, 24),  24: (8, 8, 32, 16),
            25: (8, 8, 16, 32),  26: (16, 8, 8, 32),   27: (8, 8, 8, 40),    28: (8, 8, 24, 24),   29: (16, 8, 32, 8),
            30: (16, 24, 16, 8), 31: (16, 16, 24, 8),  32: (16, 16, 16, 16), 33: (16, 24, 8, 16),  34: (16, 16, 8, 24)
        }

    def start_simulation(self):
        traci.start([self.sumo_binary, "-c", self.sumo_config])

    def close_simulation(self):
        traci.close()

    def compute_waiting_time(self):
        waiting_time = {}
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        for lane in controlled_lanes:
            waiting_time[lane] = traci.lane.getWaitingTime(lane)
        return waiting_time

    def compute_queue_length(self):
        """
        Compute queue lengths for each controlled lane using the number of vehicles
        with very low speeds (i.e., effectively stopped).
        """
        queue_length = {}
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        for lane in controlled_lanes:
            queue_length[lane] = traci.lane.getLastStepHaltingNumber(lane)
        return queue_length

    def update_traffic_light_program(self, junction_id, green_durations):
        if len(green_durations) != 4:
            raise ValueError("Must provide exactly 4 duration values")
        if not all(isinstance(x, int) and x > 0 for x in green_durations):
            raise ValueError("All durations must be positive integers")
        yellow_duration = 4
        phases = []
        states = [
            "GGgrrrrrrrrr", "yyyrrrrrrrrr",
            "rrrGGgrrrrrr", "rrryyyrrrrrr",
            "rrrrrrGGgrrr", "rrrrrryyyrrr",
            "rrrrrrrrrGGg", "rrrrrrrrryyy"
        ]
        for i, green_duration in enumerate(green_durations):
            phases.append(traci.trafficlight.Phase(
                duration=green_duration,
                state=states[i * 2],
                minDur=green_duration,
                maxDur=green_duration
            ))
            phases.append(traci.trafficlight.Phase(
                duration=yellow_duration,
                state=states[i * 2 + 1],
                minDur=yellow_duration,
                maxDur=yellow_duration
            ))
        logic = traci.trafficlight.Logic(
            programID="1",
            type=0,
            currentPhaseIndex=0,
            phases=phases
        )
        traci.trafficlight.setProgramLogic(junction_id, logic)
        return sum(green_durations) + len(green_durations) * yellow_duration  # Total cycle length

    def run_program_0(self):
        self.start_simulation()

        total_waiting_time = 0.0
        vehicle_counts = set()

        for _ in range(8400):
            traci.simulationStep()
            total_waiting_time += sum(self.compute_waiting_time().values())
            current_vehicles = set(traci.vehicle.getIDList())
            vehicle_counts.update(current_vehicles)

        total_vehicles = len(vehicle_counts)
        waiting_time_per_vehicle = total_waiting_time / total_vehicles if total_vehicles > 0 else 0
        self.close_simulation()
        print(f"\nDefault Program Results:")
        print(f"Total Wait Time: {total_waiting_time:.1f}")
        print(f"Vehicles: {total_vehicles}")
        print(f"Wait Time per Vehicle: {waiting_time_per_vehicle:.1f}")
        return total_waiting_time, total_vehicles, waiting_time_per_vehicle

    def _update_lane_flow(self, lane_id, duration):
        """
        Track vehicle flow on a lane over a specified duration.
        
        Parameters:
          lane_id (str): The ID of the lane to track
          duration (int): Number of simulation steps to track
          
        Returns:
          in_count (int): Number of vehicles that entered the lane
          out_count (int): Number of vehicles that exited the lane
        """
        # Get initial set of vehicles on the lane
        initial_vehicles = set(traci.lane.getLastStepVehicleIDs(lane_id))
        
        in_count = 0
        out_count = 0
        current_vehicles = initial_vehicles.copy()
        
        # For each step in the duration, track vehicles entering and leaving
        for _ in range(duration):
            # Get vehicles before step
            before_step = current_vehicles.copy()
            
            # Advance simulation
            traci.simulationStep()
            
            # Get vehicles after step
            after_step = set(traci.lane.getLastStepVehicleIDs(lane_id))
            
            # Vehicles that entered = in after but not before
            new_vehicles = after_step - before_step
            in_count += len(new_vehicles)
            
            # Vehicles that exited = in before but not after
            exited_vehicles = before_step - after_step
            out_count += len(exited_vehicles)
            
            # Update current vehicle set
            current_vehicles = after_step
            
        return in_count, out_count

    def apply_action_and_get_state(self, action):
        """
        Apply an action (traffic light timing) and measure the resulting flow to determine state.
        
        Parameters:
          action (int): The action index to apply
          
        Returns:
          state (int): The discretized state based on flow ratios
          needed_steps (int): Number of simulation steps that were executed
        """
        # Get the green durations for the action
        green_durations = self.action_space[action]
        
        # Update traffic light program and get the total cycle length
        cycle_length = self.update_traffic_light_program("J1", green_durations)
        
        # Get controlled lanes in order of traffic light phases
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        
        # We need each phase's affected lanes and duration
        phase_lanes = []
        phase_durations = []
        
        # Organize controlled lanes into groups matching the green phases
        # This assumes lanes are returned in the order they appear in the phases
        phase_lanes = [
            controlled_lanes[0:3],   # Lanes for first phase
            controlled_lanes[3:6],   # Lanes for second phase
            controlled_lanes[6:9],   # Lanes for third phase
            controlled_lanes[9:12]   # Lanes for fourth phase
        ]
        
        # Each phase has green + yellow duration
        phase_durations = [green_duration + 4 for green_duration in green_durations]
        
        # Track flow ratios for each phase
        flow_ratios = []

        

        # For each phase and its corresponding lanes
        for phase_idx, (lanes, duration) in enumerate(zip(phase_lanes, phase_durations)):
            # Track total inflow and outflow for all lanes in this phase
            total_in = 0
            total_out = 0
            
            # Process each lane in the phase

            for lane in lanes:
                in_count, out_count = self._update_lane_flow(lane, duration)
                total_in += in_count
                total_out += out_count
            
            # Calculate flow ratio (with epsilon to avoid division by zero)
            
            epsilon = 1e-6
            if total_in + epsilon > 0:
                ratio = total_out / (total_in + epsilon)
            else:
                ratio = 0
                
            flow_ratios.append(ratio)
            
        # Discretize the flow ratios into state
        state = self._discretize_flow_ratios(flow_ratios)
        
        return state, cycle_length
        
    def _discretize_flow_ratios(self, flow_ratios, epsilon=1e-6):
        """
        Convert flow ratios into a discrete state.
        
        Parameters:
          flow_ratios (list): List of flow ratios for each phase
          epsilon (float): Small value to avoid division by zero
          
        Returns:
          state (int): Discretized state as an integer
        """
        state = 0
        base = 3  # Three bins: 0, 1, 2
        
        # comment : 
        # number_of_veh/

        for i, ratio in enumerate(flow_ratios):
            # Assign bin based on flow ratio
            if ratio >= 1.1:
                bin_value = 2  # Efficient clearing
            elif ratio >= 0.7:
                bin_value = 1  # Moderate clearing
            else:
                bin_value = 0  # Inefficient clearing

            #000 = 0
            #201 = 

            # Combine bins using base-3 encoding
            state += bin_value * (base ** i)
            
        return state

    def discretized_state(self, action, epsilon=1e-6):
        """
        Legacy method kept for compatibility with existing code.
        Uses apply_action_and_get_state internally.
        """
        state, _ = self.apply_action_and_get_state(action)
        return state

    def get_num_actions(self):
        return len(self.action_space)


In [None]:

"""Last modifies code"""

import traci


class Environment:
    def __init__(self, config, sumo_binary):
        self.sumo_binary = sumo_binary
        self.sumo_config = config
        self.action_space = self.define_action_space()

    def define_action_space(self):
        return {
            0: (24, 8, 8, 24),   1: (24, 8, 24, 8),    2: (32, 8, 16, 8),    3: (40, 8, 8, 8),     4: (24, 8, 16, 16),
            5: (24, 24, 8, 8),   6: (8, 32, 16, 8),    7: (16, 32, 8, 8),    8: (32, 8, 8, 16),    9: (8, 32, 8, 16),
            10: (8, 24, 24, 8),  11: (32, 16, 8, 8),   12: (8, 16, 24, 16),  13: (24, 16, 8, 16),  14: (8, 24, 16, 16),
            15: (8, 16, 8, 32),  16: (8, 16, 16, 24),  17: (8, 16, 32, 8),   18: (24, 16, 16, 8),  19: (8, 24, 8, 24),
            20: (16, 8, 24, 16), 21: (8, 40, 8, 8),    22: (8, 8, 40, 8),    23: (16, 8, 16, 24),  24: (8, 8, 32, 16),
            25: (8, 8, 16, 32),  26: (16, 8, 8, 32),   27: (8, 8, 8, 40),    28: (8, 8, 24, 24),   29: (16, 8, 32, 8),
            30: (16, 24, 16, 8), 31: (16, 16, 24, 8),  32: (16, 16, 16, 16), 33: (16, 24, 8, 16),  34: (16, 16, 8, 24)
        }

    def start_simulation(self):
        traci.start([self.sumo_binary, "-c", self.sumo_config])

    def close_simulation(self):
        traci.close()

    def compute_waiting_time(self):
        waiting_time = {}
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        for lane in controlled_lanes:
            waiting_time[lane] = traci.lane.getWaitingTime(lane)
        return waiting_time

    def compute_queue_length(self):
        """
        Compute queue lengths for each controlled lane using the number of vehicles
        with very low speeds (i.e., effectively stopped).
        """
        queue_length = {}
        controlled_lanes = traci.trafficlight.getControlledLanes("J1")
        for lane in controlled_lanes:
            queue_length[lane] = traci.lane.getLastStepHaltingNumber(lane)
        return queue_length

    def update_traffic_light_program(self, junction_id, green_durations):
        if len(green_durations) != 4:
            raise ValueError("Must provide exactly 4 duration values")
        if not all(isinstance(x, int) and x > 0 for x in green_durations):
            raise ValueError("All durations must be positive integers")
        yellow_duration = 4
        phases = []
        states = [
            "GGgrrrrrrrrr", "yyyrrrrrrrrr",
            "rrrGGgrrrrrr", "rrryyyrrrrrr",
            "rrrrrrGGgrrr", "rrrrrryyyrrr",
            "rrrrrrrrrGGg", "rrrrrrrrryyy"
        ]
        for i, green_duration in enumerate(green_durations):
            phases.append(traci.trafficlight.Phase(
                duration=green_duration,
                state=states[i * 2],
                minDur=green_duration,
                maxDur=green_duration
            ))
            phases.append(traci.trafficlight.Phase(
                duration=yellow_duration,
                state=states[i * 2 + 1],
                minDur=yellow_duration,
                maxDur=yellow_duration
            ))
        logic = traci.trafficlight.Logic(
            programID="1",
            type=0,
            currentPhaseIndex=0,
            phases=phases
        )
        traci.trafficlight.setProgramLogic(junction_id, logic)
        return sum(green_durations) + len(green_durations) * yellow_duration  # Total cycle length

    def run_program_0(self):
        self.start_simulation()

        total_waiting_time = 0.0
        vehicle_counts = set()

        for _ in range(8400):
            traci.simulationStep()
            total_waiting_time += sum(self.compute_waiting_time().values())
            current_vehicles = set(traci.vehicle.getIDList())
            vehicle_counts.update(current_vehicles)

        total_vehicles = len(vehicle_counts)
        waiting_time_per_vehicle = total_waiting_time / total_vehicles if total_vehicles > 0 else 0
        self.close_simulation()
        print(f"\nDefault Program Results:")
        print(f"Total Wait Time: {total_waiting_time:.1f}")
        print(f"Vehicles: {total_vehicles}")
        print(f"Wait Time per Vehicle: {waiting_time_per_vehicle:.1f}")
        return total_waiting_time, total_vehicles, waiting_time_per_vehicle

    def _update_edge_flow(self, edge_id, duration):
        """
        Track vehicle flow on an edge over a specified duration.
        Returns:
          in_count (int): Number of vehicles that entered the lane
          out_count (int): Number of vehicles that exited the lane
        """
        # Get initial set of vehicles on the lane
        initial_vehicles = set(traci.edge.getLastStepVehicleIDs(edge_id))
        
        in_count = 0
        out_count = 0
        current_vehicles = initial_vehicles.copy()
        
        # For each step in the duration, track vehicles entering and leaving
        for _ in range(duration):
            # Get vehicles before step
            before_step = current_vehicles.copy()
            
            # Advance simulation
            traci.simulationStep()
            
            # Get vehicles after step
            after_step = set(traci.edge.getLastStepVehicleIDs(edge_id))
            
            # Vehicles that entered = in after but not before
            new_vehicles = after_step - before_step
            in_count += len(new_vehicles)
            
            # Vehicles that exited = in before but not after
            exited_vehicles = before_step - after_step
            out_count += len(exited_vehicles)
            
            # Update current vehicle set
            current_vehicles = after_step
            
        return in_count, out_count

    def apply_action_and_get_state(self, action):
        """
        Apply an action (traffic light timing) and measure the resulting flow to determine state.
        In this function, we are calculating the number of vehicles, waiting times and state. It is a helper function.
        Parameters:
          action (int): The action index to apply 
        Returns:
          state (int): The discretized state based on flow ratios
          needed_steps (int): Number of simulation steps that were executed
        """
        # Get the green durations for the action
        green_durations = self.action_space[action]
        
        # Update traffic light program and get the total cycle length
        cycle_length = self.update_traffic_light_program("J1", green_durations)
        
        #edges that are present in the simulation:
        edge_set = ["E01", "E21", "E41", "E31"]
        
        # Each phase has green + yellow duration
        phase_durations = [green_duration + 4 for green_duration in green_durations]
        
        # Track flow ratios for each phase
        in_counts = [0, 0, 0, 0]
        out_counts = [0, 0, 0, 0]
        

        count = 0
        for duration in phase_durations:

            """Label 1:"""
            #declaring a dictonaries of sets
            before_vehs = {}
            after_vehs = {}
            new_vehs = {}

            #initilazing the dictonaries with empty sets
            for i in range(0,4):
                if(i != count):
                    before_vehs[i] = set()
                    after_vehs[i] = set()
                    new_vehs[i] = set()
            
            #Fetching the vehicle of otehr edges before applying the action:
            for i in range(0, 4):
                if(i != count):
                    before_vehs[i] = set(traci.edge.getLastStepVehicleIDs(edge_set[i]))
                

            #running the current duration
            in_count, out_count = self._update_edge_flow(edge_set[count], duration)
            in_counts[count] += in_count
            out_counts[count] += out_count
            
            #Fetching the vehicle of otehr edges after applying the action:
            for i in range(0, 4):
                if(i != count):
                    after_vehs[i] = set(traci.edge.getLastStepVehicleIDs(edge_set[i]))

            #now we have the set of before vehicles and after vehicles, now calculates how many new vehicles are added
            for i in range(0, 4):
                if(i != count):
                    new_vehs[i] = after_vehs[i] - before_vehs[i]
            for i in range(0, 4):
                if(i != count):
                    in_counts[i] += len(new_vehs[i])

            # for i in range(0,4):
            #     if(i != count):
            #         in_counts[i] += len(new_vehs[i])
            count = count + 1

        # Calculate flow ratio (with epsilon to avoid division by zero)
        

        epsilon = 1e-6
        in_counts = [x + epsilon for x in in_counts]

        flow_ratios = [out / inp if inp != 0 else 0 for out, inp in zip(out_counts, in_counts)]
                    
        #flow_ratios.append(ratio)


        # # For each phase and its corresponding lanes
        # for phase_idx, (lanes, duration) in enumerate(zip(phase_lanes, phase_durations)):
        #     # Track total inflow and outflow for all lanes in this phase
        #     total_in = 0
        #     total_out = 0
            
        #     # Process each lane in the phase

        #     for lane in lanes:
        #         in_count, out_count = self._update_lane_flow(lane, duration)
        #         total_in += in_count
        #         total_out += out_count
            
        #     # Calculate flow ratio (with epsilon to avoid division by zero)
        #     epsilon = 1e-6
        #     if total_in + epsilon > 0:
        #         ratio = total_out / (total_in + epsilon)
        #     else:
        #         ratio = 0
                
        #     flow_ratios.append(ratio)
            
        # Discretize the flow ratios into state
        state = self._discretize_flow_ratios(flow_ratios)
        
        return state, cycle_length
        
    def _discretize_flow_ratios(self, flow_ratios, epsilon=1e-6):
        """
        Convert flow ratios into a discrete state.
        
        Parameters:
          flow_ratios (list): List of flow ratios for each phase
          epsilon (float): Small value to avoid division by zero
          
        Returns:
          state (int): Discretized state as an integer
        """
        state = 0
        base = 3  # Three bins: 0, 1, 2
        
        # comment : 
        # number_of_veh/

        for i, ratio in enumerate(flow_ratios):
            # Assign bin based on flow ratio
            if ratio >= 1.1:
                bin_value = 2  # Efficient clearing
            elif ratio >= 0.7:
                bin_value = 1  # Moderate clearing
            else:
                bin_value = 0  # Inefficient clearing

            #000 = 0
            #201 = 

            # Combine bins using base-3 encoding
            state += bin_value * (base ** i)
            
        return state

    def discretized_state(self, action, epsilon=1e-6):
        """
        Legacy method kept for compatibility with existing code.
        Uses apply_action_and_get_state internally.
        """
        state, _ = self.apply_action_and_get_state(action)
        return state

    def get_num_actions(self):
        return len(self.action_space)

In [None]:

"""First Simulation Code"""

import traci
import torch
import random

def run_simulation_reinforce(environment, agent, episodes):
    total_waiting_times_rl = []
    waiting_times_per_vehicle_rl = []
    initial_alpha = agent.optimizer.param_groups[0]['lr']
    decay_factor = 0.01

    print("\nEpisode Results")
    print("Format: Episode | Total Wait Time | Vehicles | Wait Time per Vehicle")
    print("-" * 75)

    for episode in range(episodes):
        # Decay learning rate over time
        agent.optimizer.param_groups[0]['lr'] = initial_alpha / (1 + decay_factor * episode)
        environment.start_simulation()
        
        # Initialize episode metrics
        total_waiting_time = 0.0
        vehicle_counts = set()
        waiting_time_data = {}
        
        # Executing first action and get initial state
        first_action = random.choice(list(environment.action_space.keys()))

        #comment: critical action??
        #crtical_flow_ratio = vehicle_in / capacity_of_the_intersection : read this
        
        current_state, _ = environment.apply_action_and_get_state(first_action)

        # Initialize trajectory storage for REINFORCE
        trajectory = []
        step = 0
        previous_waiting_time = sum(environment.compute_waiting_time().values())
        
        while step < 8400:  # Run for 2.5 hour simulation time
            # One-hot encode the current state for the agent
            state_one_hot = [0] * agent.num_states
            state_one_hot[current_state] = 1
            
            # Choose action using policy network
            action, action_prob = agent.choose_action(state_one_hot)
            
            # Applying action and get next state
            next_state, needed_steps = environment.apply_action_and_get_state(action)
            
            # Measure waiting time after action
            current_waiting_time = sum(environment.compute_waiting_time().values())
            waiting_time_difference = current_waiting_time - previous_waiting_time
            
            # Update metrics
            total_waiting_time += current_waiting_time
            current_vehicles = set(traci.vehicle.getIDList())
            vehicle_counts.update(current_vehicles)
            
            # Compute reward (negative waiting time difference)
            reward = -waiting_time_difference
            # Bonus for reducing waiting time
            if waiting_time_difference < 0:
                reward *= 1.5
                
            # Add step to trajectory
            trajectory.append((state_one_hot, action, reward, action_prob))
            
            # Update state
            current_state = next_state
            previous_waiting_time = current_waiting_time
            
            # Update step count
            step += needed_steps
            
        # Update policy at the end of episode
        agent.update_policy(trajectory)
        
        # Calculate episode metrics
        total_vehicles = len(vehicle_counts)
        waiting_time_per_vehicle = total_waiting_time / total_vehicles if total_vehicles > 0 else 0
        total_waiting_times_rl.append(total_waiting_time)
        waiting_times_per_vehicle_rl.append(waiting_time_per_vehicle)
        
        # Cleanup
        environment.close_simulation()
        
        # Printing episode results
        print(f"#{episode+1:3d} | {total_waiting_time:12.1f} | {total_vehicles:3d} | {waiting_time_per_vehicle:8.1f}")
        
    return total_waiting_times_rl, waiting_times_per_vehicle_rl