In [1]:
USE_SUMO_GUI = True # set to True to use sumo-gui
TOTAL_TIME = 1800 # 60 * 30 = 30 min
NUM_SEEDS = 5
NUM_EPISODES = 100

In [2]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import seaborn as sns
from tqdm import tqdm
from traffic_tail.environment import create_env
from traffic_tail.trainer import SUMOTrainer


def run_episode(env, agent):
    total_reward = 0
    state = env.reset()
    done = {"__all__": False}
    while not done["__all__"]:
        actions = {
            ts_id: agent[ts_id].act(state[ts_id]) 
            for ts_id in state.keys()
        }
        state, reward, done, _ = env.step(actions)
        total_reward += sum(reward.values())
    env.close()
    return total_reward

In [None]:
reward_curve_default = []
for _ in range(NUM_SEEDS):
    trainer_default = SUMOTrainer(
        env='default', 
        use_gui=USE_SUMO_GUI,
        num_seconds=TOTAL_TIME,
    )
    trainer_default.train(episodes=NUM_EPISODES)
    reward_curve_default.append(trainer_default.rewards)

data = []
for i, reward_curve in enumerate(reward_curve_default):
    for j, reward in enumerate(reward_curve):
        data.append({
            'step': j,
            'reward': reward,
            'run': i,
        })

df = pd.DataFrame(data)
sns.set()
sns.lineplot(x='step', y='reward', data=df)

In [None]:
reward_curve_tailgating = []
for _ in range(NUM_SEEDS):
    trainer_tailgating = SUMOTrainer(
        env='tailgating', 
        use_gui=USE_SUMO_GUI,
        num_seconds=TOTAL_TIME,
    )
    trainer_tailgating.load('results/default/best_agents.pkl')
    trainer_tailgating.train(episodes=NUM_EPISODES) 
    reward_curve_tailgating.append(trainer_default.rewards)

data = []
for i, reward_curve in enumerate(reward_curve_tailgating):
    for j, reward in enumerate(reward_curve):
        data.append({
            'step': j,
            'reward': reward,
            'run': i,
        })

df = pd.DataFrame(data)
sns.set()
sns.lineplot(x='step', y='reward', data=df)

In [None]:
reward_curve_overspeeding = []
for _ in range(NUM_SEEDS):
    trainer_tailgating = SUMOTrainer(
        env='overspeeding', 
        use_gui=USE_SUMO_GUI,
        num_seconds=TOTAL_TIME,
    )
    trainer_tailgating.load('results/default/best_agents.pkl')
    trainer_tailgating.train(episodes=NUM_EPISODES) 
    reward_curve_overspeeding.append(trainer_default.rewards)

data = []
for i, reward_curve in enumerate(reward_curve_overspeeding):
    for j, reward in enumerate(reward_curve):
        data.append({
            'step': j,
            'reward': reward,
            'run': i,
        })

df = pd.DataFrame(data)
sns.set()
sns.lineplot(x='step', y='reward', data=df)

In [3]:
default_env = create_env(tailgating=False, use_gui=USE_SUMO_GUI, num_seconds=TOTAL_TIME)
tailgating_env = create_env(tailgating=True, use_gui=USE_SUMO_GUI, num_seconds=TOTAL_TIME)
overspeeding_env = create_env(tailgating=False, use_gui=USE_SUMO_GUI, num_seconds=TOTAL_TIME, default_mode=24)

trainer_default = SUMOTrainer(
    env='default', 
    use_gui=USE_SUMO_GUI,
    num_seconds=TOTAL_TIME,
).load('results/default/best_agents.pkl')
trainer_tailgating = SUMOTrainer(
    env='tailgating', 
    use_gui=USE_SUMO_GUI,
    num_seconds=TOTAL_TIME,
).load('results/tailgating/best_agents.pkl')
trainer_overspeeding = SUMOTrainer(
    env='overspeeding', 
    use_gui=USE_SUMO_GUI,
    num_seconds=TOTAL_TIME,
).load('results/tailgating/best_agents.pkl')

default_agent = trainer_default.agents
tailgating_agent = trainer_tailgating.agents
overspeeding_agent = trainer_overspeeding.agents

ddr = []
dtr = []
tdr = []
ttr = []
oor = []
odr = []
dor = []

for _ in tqdm(range(NUM_SEEDS)):
    ddr.append(run_episode(default_env, default_agent))
    ttr.append(run_episode(tailgating_env, tailgating_agent))
    tdr.append(run_episode(tailgating_env, default_agent))
    dtr.append(run_episode(default_env, tailgating_agent))
    oor.append(run_episode(overspeeding_env, overspeeding_agent))
    odr.append(run_episode(overspeeding_env, default_agent))
    dor.append(run_episode(default_env, overspeeding_agent))

print(f"Default Agent in Default Environment: {sum(ddr)/len(ddr)}")
print(f"Tailgating Agent in Tailgating Environment: {sum(ttr)/len(ttr)}")
print(f"Default Agent in Tailgating Environment: {sum(tdr)/len(tdr)}")
print(f"Tailgating Agent in Default Environment: {sum(dtr)/len(dtr)}")
print(f"Overspeeding Agent in Overspeeding Environment: {sum(oor)/len(oor)}")
print(f"Overspeeding Agent in Default Environment: {sum(dor)/len(dor)}")
print(f"Default Agent in Overspeeding Environment: {sum(odr)/len(odr)}")

Using custom environment with tailgating behavior.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 12ms, vehicles TOT 0 ACT 0 BUF 0)                     
Using custom environment with tailgating behavior.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 15ms, vehicles TOT 0 ACT 0 BUF 0)                     
Using default SUMO environment.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 16ms, vehicles TOT 0 ACT 0 BUF 0)                     
Initializing RL agents. (This may take a while)
Using custom environment with tailgating behavior.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 15ms, vehicles TOT 0 ACT 0 BUF 0)                     
Initializing RL agents. (This may take a while)


  0%|          | 0/10 [00:00<?, ?it/s]

 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds
 Retrying in 1 seconds


 10%|█         | 1/10 [02:10<19:37, 130.83s/it]

 Retrying in 1 seconds


 10%|█         | 1/10 [02:41<24:15, 161.71s/it]


FatalTraCIError: Connection closed by SUMO.