In [1]:
USE_SUMO_GUI = True # set to True to use sumo-gui
TOTAL_TIME = 7200 # 60*60*2 = 2 hours (e.g. rush hour 7am - 9am)

In [2]:
import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm
from traffic_tail.environment import create_env


env = create_env(
    tailgating=False, 
    use_gui=USE_SUMO_GUI, 
    num_seconds=TOTAL_TIME,
)

total_reward = 0
state = env.reset()
done = {"__all__": False}

pbar = tqdm(total=TOTAL_TIME)
while not done["__all__"]:
    actions = {
        ts_id: env.action_spaces(ts_id).sample()
        for ts_id in env.ts_ids
    }
    state, reward, done, _ = env.step(actions)
    total_reward += sum(reward.values())
    pbar.update(env.delta_time)

env.close()
print(f"Total reward: {total_reward}")

 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 16ms, vehicles TOT 0 ACT 0 BUF 0)                     
 Retrying in 1 seconds


100%|██████████| 7200/7200 [01:04<00:00, 93.40it/s] 

Total reward: -124.61000000000013


100%|██████████| 7200/7200 [01:20<00:00, 93.40it/s]

In [3]:
import warnings
warnings.filterwarnings("ignore")

from tqdm import tqdm
from traffic_tail.environment import create_env


env = create_env(
    tailgating=True, 
    use_gui=USE_SUMO_GUI, 
    num_seconds=TOTAL_TIME
)

total_reward = 0
state = env.reset()
done = {"__all__": False}

pbar = tqdm(total=TOTAL_TIME)
while not done["__all__"]:
    actions = {
        ts_id: env.action_spaces(ts_id).sample()
        for ts_id in env.ts_ids
    }
    state, reward, done, _ = env.step(actions)
    total_reward += sum(reward.values())
    pbar.update(env.delta_time)
    
env.close()
print(f"Total reward: {total_reward}")

 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 17ms, vehicles TOT 0 ACT 0 BUF 0)                     
 Retrying in 1 seconds


100%|██████████| 7200/7200 [01:12<00:00, 99.90it/s] 


Total reward: -605.1600000000003




In [None]:
from traffic_tail.trainer import SUMOTrainer

trainer = SUMOTrainer(
    env='default', 
    use_gui=USE_SUMO_GUI,
    num_seconds=TOTAL_TIME,
)
trainer.train(episodes=30) # train for 30 days of rush hour traffic
trainer.save('results/pretrained_model.pkl')

In [None]:
agents = trainer.agents
env = create_env(
    tailgating=False, 
    use_gui=USE_SUMO_GUI, 
    num_seconds=TOTAL_TIME
)

total_reward = 0
state = env.reset()
done = {"__all__": False}

pbar = tqdm(total=TOTAL_TIME)
while not done["__all__"]:
    actions = {
        ts_id: agents[ts_id].act(state[ts_id]) 
        for ts_id in state.keys()
    }
    state, reward, done, _ = env.step(actions)
    total_reward += sum(reward.values())
    pbar.update(env.delta_time)

env.close()
print(f"Total reward: {total_reward}")

In [None]:
agents = trainer.agents
env = create_env(
    tailgating=True, 
    use_gui=USE_SUMO_GUI, 
    num_seconds=TOTAL_TIME
)

total_reward = 0
state = env.reset()
done = {"__all__": False}

pbar = tqdm(total=env.sim_max_time)
while not done["__all__"]:
    actions = {
        ts_id: agents[ts_id].act(state[ts_id]) 
        for ts_id in state.keys()
    }
    state, reward, done, _ = env.step(actions)
    total_reward += sum(reward.values())
    pbar.update(env.delta_time)

env.close()
print(f"Total reward: {total_reward}")