In [None]:
USE_SUMO_GUI = False
TOTAL_TIME = 900
NUM_SEEDS = 3
NUM_EPISODES = 40


class DefaultConfig:
    name = "default"
    use_gui = USE_SUMO_GUI
    num_seconds = TOTAL_TIME
    tailgating = False
    default_mode = 31


class OverspeedConfig:
    name = "overspeed"
    use_gui = USE_SUMO_GUI
    num_seconds = TOTAL_TIME
    tailgating = False
    default_mode = 24
    

class TailgatingConfig:
    name = "tailgating"
    use_gui = USE_SUMO_GUI
    num_seconds = TOTAL_TIME
    tailgating = True
    default_mode = 31
    

class TailgatingOverspeedConfig:
    name = "tailgating_overspeed"
    use_gui = USE_SUMO_GUI
    num_seconds = TOTAL_TIME
    tailgating = True
    default_mode = 24

In [None]:
import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import seaborn as sns

from glob import glob
from tqdm import tqdm
from traffic_tail.environment import create_env
from traffic_tail.trainer import SUMOTrainer


def run_episode(env, agent):
    total_reward = 0
    state = env.reset()
    done = {"__all__": False}
    while not done["__all__"]:
        actions = {
            ts_id: agent[ts_id].act(state[ts_id]) 
            for ts_id in state.keys()
        }
        state, reward, done, _ = env.step(actions)
        total_reward += sum(reward.values())
    env.close()
    return total_reward

In [None]:
reward_curves = glob('results/*/rewards_*.npy')

for reward_curve_file in reward_curves:
    reward_curve = np.load(reward_curve_file)
    data = []
    for i, curve in enumerate(reward_curve):
        for j, reward in enumerate(curve):
            data.append({
                'step': j,
                'reward': reward,
                'run': i,
            })

    df = pd.DataFrame(data)
    sns.set()
    sns.lineplot(x='step', y='reward', data=df)
    

In [None]:
default_config = DefaultConfig()
overspeed_config = OverspeedConfig()
tailgating_config = TailgatingConfig()

default_env = create_env(default_config)
tailgating_env = create_env(tailgating_config)
overspeeding_env = create_env(overspeed_config)

ddr = []
dtr = []
tdr = []
ttr = []
oor = []
odr = []
dor = []

for seed in tqdm(range(NUM_SEEDS)):
    trainer_default = SUMOTrainer(default_config).load(f'results/default/best_agents_run_{seed}.pkl')
    trainer_tailgating = SUMOTrainer(tailgating_config).load(f'results/tailgating/best_agents_run_{seed}.pkl')
    # trainer_overspeeding = SUMOTrainer(overspeed_config).load(f'results/overspeed/best_agents_run_{seed}.pkl')
    default_agent = trainer_default.agents
    tailgating_agent = trainer_tailgating.agents
    # overspeeding_agent = trainer_overspeeding.agents

    ddr.append(run_episode(default_env, default_agent))
    ttr.append(run_episode(tailgating_env, tailgating_agent))
    tdr.append(run_episode(tailgating_env, default_agent))
    dtr.append(run_episode(default_env, tailgating_agent))
    # oor.append(run_episode(overspeeding_env, overspeeding_agent))
    odr.append(run_episode(overspeeding_env, default_agent))
    # dor.append(run_episode(default_env, overspeeding_agent))

print(f"Default Agent in Default Environment: {sum(ddr)/len(ddr)}")
print(f"Tailgating Agent in Tailgating Environment: {sum(ttr)/len(ttr)}")
print(f"Default Agent in Tailgating Environment: {sum(tdr)/len(tdr)}")
print(f"Tailgating Agent in Default Environment: {sum(dtr)/len(dtr)}")
# print(f"Overspeeding Agent in Overspeeding Environment: {sum(oor)/len(oor)}")
# print(f"Overspeeding Agent in Default Environment: {sum(dor)/len(dor)}")
print(f"Default Agent in Overspeeding Environment: {sum(odr)/len(odr)}")

Creating tailgating environment.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 5ms, vehicles TOT 0 ACT 0 BUF 0)                      
Initializing RL agents. (This may take a while)
 Retrying in 1 seconds
Step #900.00 (0ms ?*RT. ?UPS, TraCI: 24ms, vehicles TOT 640 ACT 79 BUF 20)                
 Retrying in 1 seconds
Step #900.00 (1ms ~= 1000.00*RT, ~38000.00UPS, TraCI: 14ms, vehicles TOT 667 ACT 38 BUF 1)
 Retrying in 1 seconds
Step #900.00 (0ms ?*RT. ?UPS, TraCI: 15ms, vehicles TOT 689 ACT 29 BUF 0)                 
 Retrying in 1 seconds
Step #900.00 (1ms ~= 1000.00*RT, ~51000.00UPS, TraCI: 17ms, vehicles TOT 593 ACT 51 BUF 33
 Retrying in 1 seconds




Step #900.00 (0ms ?*RT. ?UPS, TraCI: 23ms, vehicles TOT 701 ACT 72 BUF 3)                 
Creating default environment.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 6ms, vehicles TOT 0 ACT 0 BUF 0)                      
Initializing RL agents. (This may take a while)
Creating tailgating environment.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 5ms, vehicles TOT 0 ACT 0 BUF 0)                      
Initializing RL agents. (This may take a while)
 Retrying in 1 seconds
Step #900.00 (0ms ?*RT. ?UPS, TraCI: 21ms, vehicles TOT 678 ACT 65 BUF 0)                 
 Retrying in 1 seconds
Step #900.00 (0ms ?*RT. ?UPS, TraCI: 20ms, vehicles TOT 690 ACT 55 BUF 2)                 
 Retrying in 1 seconds
Step #900.00 (0ms ?*RT. ?UPS, TraCI: 16ms, vehicles TOT 662 ACT 32 BUF 2)                 
 Retrying in 1 seconds
Step #900.00 (1ms ~= 1000.00*RT, ~96000.00UPS, TraCI: 29ms, vehicles TOT 620 ACT 96 BUF 21
 Retrying in 1 seconds




Step #900.00 (0ms ?*RT. ?UPS, TraCI: 23ms, vehicles TOT 662 ACT 59 BUF 0)                 
Creating default environment.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 5ms, vehicles TOT 0 ACT 0 BUF 0)                      
Initializing RL agents. (This may take a while)
Creating tailgating environment.
 Retrying in 1 seconds
Step #0.00 (0ms ?*RT. ?UPS, TraCI: 3ms, vehicles TOT 0 ACT 0 BUF 0)                      
Initializing RL agents. (This may take a while)
