In [523]:
import gymnasium as gym
import numpy as np
from gym.spaces import Discrete, Box
import pygame


import os
import sys
import traci
import traci.constants as tc

if 'SUMO_HOME' in os.environ:
    sys.path.append(os.path.join(os.environ['SUMO_HOME'], 'tools'))
import traci

#load sumo configuration
sumoBinary = "sumo"
sumoCmd = [sumoBinary, "-c", "002.sumocfg"]

In [656]:
class Bus_holding_env(gym.Env):
    def __init__(self, render_mode=None, sumocfg="002.sumocfg"):
        self.bus = {0:'t_0',
                    1:'t_1',
                    2:'t_2',
                    3:'t_3'}
        self.busstop = {0:'bs_1',
                        1:'bs_2',
                        2:'bs_3',
                        3:'bs_4'}
        self.Compliane_score={0:1,
                              1:2/3,
                              2:1
        }
        self.loop = {0:'loop_1',
                        1:'loop_2',
                        2:'loop_3',
                        3:'loop_4'}
        self.control_busstop={0:'bs_1',
                                1:'bs_3'}
        self.control_bustop_index_convert ={0:0,
                                            1:2}
        
        self.control_bus={0:'t_1',
                        1:'t_2',
                        2:'t_3'}
        
        self.control_bus_index_convert ={0:1,
                                            1:2,
                                            2:3}

        self.observe_busstop=[1,2,3] 
        
        n,m=len(self.bus), len(self.busstop)
        
        self.max_headway=50
        self.initial_headway=20
        
        self.time_slice=15
        self.max_timesetp=400

        self.control_busstop_duration=20


        # self.action_space = gym.spaces.Discrete(6) # duration of holding of bus1, bus2,bus3
        self.action_space = gym.spaces.Discrete(20)
        

        self.observation_space=gym.spaces.Dict(
            {
                "headway": gym.spaces.Box(0, high=self.max_headway,  shape=(n, m), dtype=int),
                "target": gym.spaces.MultiDiscrete([3,2])
            }
        )
        self.decision_finish= np.full((len(self.control_bus),len(self.control_busstop)), False, dtype=bool)


        self.render_mode = render_mode

        #arrival time
        self.arrival_times = np.ones((n,m))*np.nan
        self.arrival_check = np.full((n,m), False, dtype=bool)

        #headway of bus [1,2,3] at bus_stop[1,2,3,4]
        self.headway = np.ones((n,m))*self.max_headway
        self.headway[0]=self.initial_headway

        #target=[target bus , target bus stop]
        self.target=np.array([0,0])


        #sumo start to simulate 
        self.sumocfg=sumocfg
        self.sumoCmd=["sumo", "-c",self.sumocfg]#, "--no-warnings" 

        traci.start(self.sumoCmd)
        print("check first start")



    def render(self):
        return None
    
    def reset(self, seed=42):

        traci.close()
        print("check reset close")
        traci.start(self.sumoCmd)
        print("check second start")


        # traci.start(self.sumoCmd)

        """
        sumo simulation run until the second bus reach the first control stop 
        update the arrival statue
        """
        found_ind=False
        while not found_ind :
            traci.simulationStep()

            #check arrival time of first two bus at first bus stop
            for bus_i in [0,1]:
                for busstop_j in [0]:
                    # check if not arrival 
                    if not self.arrival_check[bus_i][busstop_j]:
                        #check if bus_i arrive bus_j
                        loop_data=traci.inductionloop.getVehicleData(loopID=self.loop[busstop_j])
                        if loop_data!=[]:
                            if self.bus[bus_i] in loop_data[0][0]:
                                self.arrival_times[bus_i][busstop_j]=int(loop_data[0][2])# record the arrival time
                                self.arrival_check[bus_i][busstop_j]=True #update the arrival status

            if self.arrival_check[1][0] :
                found_ind=True
        
        self.target=np.array([0,0]) # the  holding decision should made on the bus[1] at stop[1]

        #update headway
        self.update_headway()
        
        observation = self._get_obs()
        info = {}

        return observation,info

    def step(self, action):
        # action is duration of holding
        stop_duration=10+ 3* action* self.Compliane_score[self.target[0]]
        traci.vehicle.setBusStop(vehID=self.control_bus[self.target[0]]
                                 ,stopID=self.control_busstop[self.target[1]]
                                 , duration=stop_duration)
        
        #update finished decision 
        self.decision_finish[self.target[0], self.target[1]]=True

        if self.decision_finish[-1][-1]: #last decision is finished
            terminated = True

        else:
            terminated = False
        
        
        # if not terminate find the next decision step 
        if not terminated:
            next_decision_found=False
            while not next_decision_found:
                traci.simulationStep()
                self.arrival_update()
                for row in np.argwhere(self.decision_finish==False):
                    i=self.control_bus_index_convert[row[0]]
                    j=self.control_bustop_index_convert[row[1]]
                    if self.arrival_check[i,j]==True:
                        self.target=row
                        self.update_headway()
                        next_decision_found=True


        observation=self._get_obs()


        #reward:
        if terminated:
            # simulate until last bus arrive last stop 
            while not np.all(self.arrival_check):
                traci.simulationStep()
                self.arrival_update()
        
            # calculate the headway 
            E_h= np.mean(self.headway, axis=0)
            Var_h=np.var(self.headway, axis=0)
            reward = - sum([ E_h[j]/2*(1+Var_h[j]/E_h[j]**2) for j in range(len(E_h))]) - action
            

        else:
            reward=-action


        info={}

        return observation, reward, terminated, False, info

        
    def sumo_finish(self):
        """check if sumo terminate, which means no vehicle left in the network"""
        end = True if traci.simulation.getMinExpectedNumber()==0 else False
        return end

    
    def update_headway(self):
        previous_arrivals = np.roll(self.arrival_times , shift=1, axis=0)
        valid = ~np.isnan(self.arrival_times) & ~np.isnan(self.arrival_times)
        self.headway[1:, :] = np.where(valid[1:, :], self.arrival_times[1:, :] - previous_arrivals[1:, :], 50)


    def _get_obs(self):
        headway = self.headway
        target = self.target  # target bus, and stop 
        obs= {"headway":headway,
              "target":target}
        return obs
    
    def arrival_update(self):
        """record/ update each vehicle arrive stop time"""
        #get each vehicle first unarrival stop 
        for i in range(self.arrival_check.shape[0]):
            unarriaval_bs = np.argwhere( self.arrival_check[i]==False).tolist()
            if unarriaval_bs!=[]:
                bus_stop=unarriaval_bs[0][0]
                loop_data=traci.inductionloop.getVehicleData(loopID=self.loop[bus_stop]) # The first unarrival stop for bus i
                if loop_data!=[]:
                    if self.bus[i] in loop_data[0][0]:
                        self.arrival_times[i][bus_stop]=int(loop_data[0][2])# record the arrival time
                        self.arrival_check[i][bus_stop]=True #update the arrival status
                

    def close(self):
        traci.close()
        

In [657]:
env=Bus_holding_env()
env.reset()

 Retrying in 1 seconds
check first start
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 0 ACT 0 BUF 0)                      
check reset close
 Retrying in 1 seconds
check second start


({'headway': array([[20., 20., 20., 20.],
         [22., 50., 50., 50.],
         [50., 50., 50., 50.],
         [50., 50., 50., 50.]]),
  'target': array([0, 0])},
 {})

In [658]:
env.close()

Step #78.00 (0ms ?*RT. ?UPS, TraCI: 3900ms, vehicles TOT 2 ACT 2 BUF 0)                   


In [659]:
import gymnasium as gym

from stable_baselines3 import DQN

env = Bus_holding_env()

model = DQN("MultiInputPolicy", env, verbose=1)
model.learn(total_timesteps=10000, log_interval=4)
model.save("dqn_bus_holding")



 Retrying in 1 seconds
check first start
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 44ms, vehicles TOT 0 ACT 0 BUF 0)                     
check reset close
 Retrying in 1 seconds




check second start
Step #364.00 (0ms ?*RT. ?UPS, TraCI: 1ms, vehicles TOT 4 ACT 1 BUF 0)                     
check reset close
 Retrying in 1 seconds
check second start
Step #1.00 (0ms ?*RT. ?UPS, TraCI: 2ms, vehicles TOT 1 ACT 1 BUF 0)                       
check reset close
 Retrying in 1 seconds
check second start
Step #1.00 (0ms ?*RT. ?UPS, TraCI: 3ms, vehicles TOT 1 ACT 1 BUF 0)                       
check reset close
 Retrying in 1 seconds
check second start
Step #1.00 (0ms ?*RT. ?UPS, TraCI: 3ms, vehicles TOT 1 ACT 1 BUF 0)                       
check reset close
 Retrying in 1 seconds
check second start
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 2.25     |
|    ep_rew_mean      | -121     |
|    exploration_rate | 0.991    |
| time/               |          |
|    episodes         | 4        |
|    fps              | 1        |
|    time_elapsed     | 5        |
|    total_timesteps  | 9        |
---------------------------

In [664]:
env=Bus_holding_env()

 Retrying in 1 seconds
check first start


In [665]:
#
del model # remove to demonstrate saving and loading

model = DQN.load("dqn_bus_holding")

obs, info = env.reset()
while True:
    action, _states = model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(action)
    print(obs)
    print(action)
    if terminated or truncated:
        obs, info = env.reset()

Step #0.00 (0ms ?*RT. ?UPS, TraCI: 1718ms, vehicles TOT 0 ACT 0 BUF 0)                   
check reset close
 Retrying in 1 seconds
check second start
{'headway': array([[20., 20., 20., 20.],
       [22., 14., 50., 50.],
       [57., 50., 50., 50.],
       [50., 50., 50., 50.]]), 'target': array([1, 0])}
0
{'headway': array([[20., 20., 20., 20.],
       [22., 14., 50., 50.],
       [57., 50., 50., 50.],
       [22., 50., 50., 50.]]), 'target': array([2, 0])}
0
{'headway': array([[20., 20., 20., 20.],
       [22., 14.,  3., 50.],
       [57., 66., 50., 50.],
       [22., 50., 50., 50.]]), 'target': array([0, 1])}
0
{'headway': array([[20., 20., 20., 20.],
       [22., 14.,  3.,  7.],
       [57., 66., 79., 50.],
       [22., 14., 50., 50.]]), 'target': array([1, 1])}
0
{'headway': array([[20., 20., 20., 20.],
       [22., 14.,  3.,  7.],
       [57., 66., 79., 50.],
       [22., 14.,  3., 50.]]), 'target': array([2, 1])}
3
{'headway': array([[20., 20., 20., 20.],
       [22., 14.,  3.,  

KeyboardInterrupt: 

In [417]:
for i in range(3):
    traci.simulationStep()
traci.vehicle.setBusStop(stopID="bs_1",vehID="t_1", duration=37)

In [418]:
time_step=0
while time_step<40:
    traci.simulationStep()
    print(traci.simulation.getTime(), traci.busstop.getVehicleIDs("bs_1"))
    time_step+=1


82.0 ('t_1',)
83.0 ('t_1',)
84.0 ('t_1',)
85.0 ('t_1',)
86.0 ('t_1',)
87.0 ('t_1',)
88.0 ('t_1',)
89.0 ('t_1',)
90.0 ('t_1',)
91.0 ('t_1',)
92.0 ('t_1',)
93.0 ('t_1',)
94.0 ('t_1',)
95.0 ('t_1',)
96.0 ('t_1',)
97.0 ('t_1',)
98.0 ('t_1',)
99.0 ('t_1',)
100.0 ('t_1',)
101.0 ('t_1',)
102.0 ('t_1',)
103.0 ('t_1',)
104.0 ('t_1',)
105.0 ('t_1',)
106.0 ('t_1',)
107.0 ('t_1',)
108.0 ('t_1',)
109.0 ('t_1',)
110.0 ('t_1',)
111.0 ('t_1',)
112.0 ('t_1',)
113.0 ('t_1',)
114.0 ('t_1',)
115.0 ('t_1',)
116.0 ('t_1',)
117.0 ('t_1',)
118.0 ('t_1',)
119.0 ()
120.0 ()
121.0 ()


In [291]:
traci.start(sumoCmd) 

""" terminate critia"""
def finish():
    end= True if traci.simulation.getMinExpectedNumber()==0 else False
    return end

"""restart"""
def restart():
    traci.close()
    return None



while traci.simulation.getTime()<180 and not finish():
    traci.simulationStep()
    if len(traci.busstop.getVehicleIDs(target_stop))!=0:
        print(step, traci.busstop.getVehicleIDs(target_stop))
    if "t_0" in traci.busstop.getVehicleIDs("bs_1"):
        print("target bus at target stop, step=", traci.simulation.getTime())

        break

#traci.close()

 Retrying in 1 seconds
target bus at target stop, step= 61.0
