In [1]:
import os
import sys
import optparse
from environment import environment
from sumolib import checkBinary  # Checks for the binary in environ vars
import traci
from agent import agents

In [2]:
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
    print('added SUMO_HOME to tools directory')
else:
    sys.exit("please declare environment variable 'SUMO_HOME'")

 

added SUMO_HOME to tools directory


In [3]:
sumoBinaryNoGui = checkBinary('sumo')
sumoBinary = checkBinary('sumo-gui')

nogui = [sumoBinaryNoGui, "-c", "test.sumocfg", '--no-warnings']
gui = [sumoBinary, "-c", "test.sumocfg",
                            '--start', '--quit-on-end']

In [4]:
def default_run():
    traci.start(gui)
    total_time = 0
    env = environment()
    traci.trafficlight.setPhase('0', 0)
    action = [0]
    while traci.simulation.getMinExpectedNumber() > 0:
        traci.simulationStep()
        total_time += env.getWaitingTime()
    print('total waiting time of defaul cyclic traffic light', total_time)
    traci.close()

In [5]:
# contains TraCI control loop
def run(episodes):
                             
    traci.start(nogui)
    env = environment()
    agent = agents(env.state_size(), env.action_size())
    agent.load_policy('model.pt')
    traci.close()
    epsilon = 0.1
    ep = 0.
    while ep < episodes:
        ep+=1
        print(int(ep))
        total_waiting_time = 0
        epsilon = ep / (episodes - 3.1)
        prevPhase = 0
        traci.start(nogui)
        step = 0
        prev = 0
        action = [0]
        print(epsilon)
        while traci.simulation.getMinExpectedNumber() > 0 and step < 5e3:
            traci.simulationStep()
            env.do_action(action)
            phase = env.getPhase()
            total_waiting_time += env.getWaitingTime()
           
            if (step % 20 == 0):
                state = env.getState()
                action = agent.select_actions(epsilon , state)
                nextState, reward = env.do_action(action)
#                 print(reward)
                agent.add_memmory(state, action, nextState, reward)
                agent.train()
            step += 1
        print(len(agent.mem))      
        if (step >= 5e3-1):
            print('fail to complete the episode')
        print(total_waiting_time)
        traci.close()
        
    agent.save_policy('model.pt')
    sys.stdout.flush()


In [6]:
default_run()

{'0': ['GGGGGgrrrrrrrrgGGGGgrrrrrrrGr', 'GGGGGgrrrrrrrrgGGGGgrrrrrrrrr', 'yyyyygrrrrrrrryyyyygrrrrrrrrr', 'rrrrrGrrrrrrrrrrrrrGrrrrrrrrr', 'rrrrryrrrrrrrrrrrrryrrrrrrrrr', 'rrrrrrGGGGGGGgrrrrrrgGGGGggrG', 'rrrrrrGGGGGGGgrrrrrrgGGGGggrr', 'rrrrrryyyyyyyyrrrrrryyyyyyyrr']}
{'0': {'21_1', '21_3', '21_2', '31_2', '41_3', ':0_w2_0', '11_2', '11_1', '11_0', '41_2', '21_4', '11_3', '31_0', '31_1', ':0_w1_0', '41_1', '41_0', '21_0', '31_3'}}
total waiting time of defaul cyclic traffic light 10613983.0


In [7]:
import traceback

try:
    run(episodes=20)
except Exception as e:
    traceback.print_exc()
    traci.close()


{'0': ['GGGGGgrrrrrrrrgGGGGgrrrrrrrGr', 'GGGGGgrrrrrrrrgGGGGgrrrrrrrrr', 'yyyyygrrrrrrrryyyyygrrrrrrrrr', 'rrrrrGrrrrrrrrrrrrrGrrrrrrrrr', 'rrrrryrrrrrrrrrrrrryrrrrrrrrr', 'rrrrrrGGGGGGGgrrrrrrgGGGGggrG', 'rrrrrrGGGGGGGgrrrrrrgGGGGggrr', 'rrrrrryyyyyyyyrrrrrryyyyyyyrr']}
{'0': {'21_1', '21_3', '21_2', '31_2', '41_3', ':0_w2_0', '11_2', '11_1', '11_0', '41_2', '21_4', '11_3', '31_0', '31_1', ':0_w1_0', '41_1', '41_0', '21_0', '31_3'}}
device available:  cpu
1
0.0591715976331361


  return F.mse_loss(input, target, reduction=self.reduction)


173
40522240.0
2
0.1183431952662722
328
29955916.0
3
0.1775147928994083
461
27468934.0
4
0.2366863905325444
612
32299604.0
5
0.2958579881656805
758
25427816.0
6
0.3550295857988166
931
35920834.0
7
0.4142011834319527
1001
38600179.0
8
0.4733727810650888
1001
40491664.0
9
0.5325443786982249
1001
41579626.0
10
0.591715976331361
1001
47418812.0
11
0.6508875739644971
1001
29286085.0
12
0.7100591715976332
1001
19594308.0
13
0.7692307692307693
1001
30676050.0
14
0.8284023668639054
1001
28832319.0
15
0.8875739644970415
1001
36737810.0
16
0.9467455621301776
1001
21385071.0
17
1.0059171597633136
1001
21939953.0
18
1.0650887573964498
1001
31593345.0
19
1.124260355029586
1001
73264525.0
20
1.183431952662722
1001
25092724.0


In [8]:
def demo():
    traci.start(gui)
    total_time = 0
    env = environment()
    agent = agents(env.state_size(), env.action_size())
    agent.load_policy('model.pt')
    step = 0
    prevPhase = 0
    action = [0]
    while traci.simulation.getMinExpectedNumber() > 0 and step < 5e3:
        traci.simulationStep()
        env.do_action(action)
        step += 1
        total_time += env.getWaitingTime()
        if step % 100 == 0:
            state = env.getState()
            phase = env.getPhase()
            if (phase != prevPhase):
                print(phase)
                prevPhase = phase
            action = agent.select_actions(1 , state)
            nextState, reward = env.do_action(action)
    if (step >= 5e3-1):
        print('fail to complete the episode')
    print('total waiting time of rl traffic light', total_time)
    traci.close()

In [9]:
demo()

{'0': ['GGGGGgrrrrrrrrgGGGGgrrrrrrrGr', 'GGGGGgrrrrrrrrgGGGGgrrrrrrrrr', 'yyyyygrrrrrrrryyyyygrrrrrrrrr', 'rrrrrGrrrrrrrrrrrrrGrrrrrrrrr', 'rrrrryrrrrrrrrrrrrryrrrrrrrrr', 'rrrrrrGGGGGGGgrrrrrrgGGGGggrG', 'rrrrrrGGGGGGGgrrrrrrgGGGGggrr', 'rrrrrryyyyyyyyrrrrrryyyyyyyrr']}
{'0': {'21_1', '21_3', '21_2', '31_2', '41_3', ':0_w2_0', '11_2', '11_1', '11_0', '41_2', '21_4', '11_3', '31_0', '31_1', ':0_w1_0', '41_1', '41_0', '21_0', '31_3'}}
device available:  cpu
7
2
5
2
5
2
5
1
4
1
4
2
5
1
0
1
fail to complete the episode
total waiting time of rl traffic light 59235297.0
