# Grid2x2 using RLlib

In [1]:
import csv
import os

import ray
from ray.rllib.env.wrappers.multi_agent_env_compatibility import MultiAgentEnvCompatibility
from ray.tune.registry import register_env

from envs import MultiAgentSumoEnv
from observation import Grid2x2ObservationFunction
from reward_functions import combined_reward

In [2]:
import random
import numpy as np
import torch

TEST_NUM = 7
SEED = 23423  # default SUMO seed no.
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

ENV_NAME = "grid2x2"
assert not os.path.exists(os.path.join("ray_checkpoints",ENV_NAME,f"test_{TEST_NUM}"))

In [3]:
def train_env_creator(args):
    env_params = {
        "net_file": os.path.join("nets",ENV_NAME,f"{ENV_NAME}.net.xml"),
        "route_file": os.path.join("nets",ENV_NAME,f"{ENV_NAME}.rou.xml"),
        "num_seconds": 3600,
        "reward_fn": combined_reward,
        "sumo_seed": SEED,
        "observation_class": Grid2x2ObservationFunction,
        "add_system_info": False,
    }
    congestion_reward = combined_reward.__defaults__[0].__name__
    alpha = combined_reward.__defaults__[1]  # congestion component coefficient
    print(congestion_reward, alpha)

    env = MultiAgentSumoEnv(**env_params)
    return env

In [4]:
from ray.rllib.algorithms.ppo import PPOConfig

# From https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/ppo/atari-ppo.yaml

train_env = MultiAgentEnvCompatibility(train_env_creator({}))

config: PPOConfig
config = (
    PPOConfig()
    .environment(env=ENV_NAME)
    .framework(framework="torch")
    .rollouts(
        rollout_fragment_length=100,
        num_rollout_workers=10,
    )
    .training(
        lambda_=0.95,
        kl_coeff=0.5,
        clip_param=0.1,
        vf_clip_param=10.0,
        entropy_coeff=0.01,
        train_batch_size=1000,
        sgd_minibatch_size=100,
        num_sgd_iter=10,
    )
    .evaluation(
        evaluation_duration=1,
        evaluation_num_workers=1,
        evaluation_sample_timeout_s=300,
    )
    .debugging(seed=SEED)
    .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "1")))
    .multi_agent(
        policies=set(train_env.env.ts_ids),
        policy_mapping_fn=(lambda agent_id, *args, **kwargs: agent_id),
    )
    .fault_tolerance(recreate_failed_workers=True)
)

delta_wait_time_reward 0.85


In [5]:
csv_dir = os.path.join("outputs",ENV_NAME,f"test_{TEST_NUM}")
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)

## Play Untrained Agent

In [6]:
def eval_env_creator(csv_path, tb_log_dir):
    env_params = {
        "net_file": os.path.join("nets",ENV_NAME,f"{ENV_NAME}.net.xml"),
        "route_file": os.path.join("nets",ENV_NAME,f"{ENV_NAME}.rou.xml"),
        "num_seconds": 3600,
        "reward_fn": combined_reward,
        "sumo_seed": SEED,
        "observation_class": Grid2x2ObservationFunction,
        "add_system_info": False,
    }
    congestion_reward = combined_reward.__defaults__[0].__name__
    alpha = combined_reward.__defaults__[1]  # congestion component coefficient
    print(congestion_reward, alpha)

    env = MultiAgentSumoEnv(eval=True, csv_path=csv_path, tb_log_dir=tb_log_dir, **env_params)
    return env

In [7]:
ray.init()

csv_path = os.path.join(csv_dir, "untrained.csv")
tb_log_dir = os.path.join("logs", ENV_NAME, f"PPO_{TEST_NUM}", "eval_untrained")

with open(csv_path, "a", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["sim_time", "arrived_num", "sys_tyre_pm", "sys_stopped",
                         "sys_total_wait", "sys_avg_wait", "sys_avg_speed",
                         "agents_tyre_pm", "agents_stopped", "agents_total_wait",
                         "agents_avg_speed", "agents_total_pressure"])

register_env(ENV_NAME, lambda config: MultiAgentEnvCompatibility(eval_env_creator(csv_path, tb_log_dir)))

2023-06-05 01:19:38,558	INFO worker.py:1625 -- Started a local Ray instance.


In [8]:
algo = config.build()

2023-06-05 01:19:42,124	INFO algorithm.py:527 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


[2m[36m(RolloutWorker pid=29500)[0m delta_wait_time_reward 0.85
[2m[36m(RolloutWorker pid=1940)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 9ms, vehicles TOT 0 ACT 0 BUF 0)                      




[2m[36m(RolloutWorker pid=7384)[0m delta_wait_time_reward 0.85[32m [repeated 10x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(RolloutWorker pid=25552)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 7ms, vehicles TOT 0 ACT 0 BUF 0)                      [32m [repeated 9x across cluster][0m


2023-06-05 01:19:56,586	INFO trainable.py:172 -- Trainable.setup took 14.466 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [9]:
algo.evaluate()

Step #3600.00 (1ms ~= 1000.00*RT, ~231000.00UPS, TraCI: 251ms, vehicles TOT 1715 ACT 231 BT 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~231000.00UPS, TraCI: 251ms, vehicles TOT 1715 ACT 231 BT 3 ACT 3 BUF 0)                      


{'evaluation': {'episode_reward_max': -315584.44942888705,
  'episode_reward_min': -315584.44942888705,
  'episode_reward_mean': -315584.44942888705,
  'episode_len_mean': 720.0,
  'episode_media': {},
  'episodes_this_iter': 1,
  'policy_reward_min': {'1': -75579.12950596321,
   '2': -82284.05196430066,
   '5': -78189.73550824964,
   '6': -79531.53245037395},
  'policy_reward_max': {'1': -75579.12950596321,
   '2': -82284.05196430066,
   '5': -78189.73550824964,
   '6': -79531.53245037395},
  'policy_reward_mean': {'1': -75579.12950596321,
   '2': -82284.05196430066,
   '5': -78189.73550824964,
   '6': -79531.53245037395},
  'custom_metrics': {},
  'hist_stats': {'episode_reward': [-315584.44942888705],
   'episode_lengths': [720],
   'policy_1_reward': [-75579.12950596321],
   'policy_2_reward': [-82284.05196430066],
   'policy_5_reward': [-78189.73550824964],
   'policy_6_reward': [-79531.53245037395]},
  'sampler_perf': {'mean_raw_obs_processing_ms': 4.280402491724276,
   'mean_inf

In [10]:
ray.shutdown()

## Train RL Agent

In [11]:
ray.init()

register_env(ENV_NAME, lambda config: MultiAgentEnvCompatibility(train_env_creator(config)))

2023-06-05 01:24:03,455	INFO worker.py:1625 -- Started a local Ray instance.


In [12]:
algo = config.build()



[2m[36m(RolloutWorker pid=19592)[0m delta_wait_time_reward 0.85




[2m[36m(RolloutWorker pid=19936)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 7ms, vehicles TOT 0 ACT 0 BUF 0)                      


2023-06-05 01:24:17,616	INFO trainable.py:172 -- Trainable.setup took 10.848 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [13]:
from datetime import datetime

TRAIN_EPS = 1400  # 720 * 1400 == 1_008_000 total timesteps
CHECKPOINT_FREQ = 100
assert TRAIN_EPS % CHECKPOINT_FREQ == 0

tic = datetime.now()

for i in range(TRAIN_EPS):
    results = algo.train()

    if (i+1) % CHECKPOINT_FREQ == 0:
        algo.save(os.path.join("ray_checkpoints","grid2x2",f"test_{TEST_NUM}"))

toc = datetime.now()



Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 86ms, vehicles TOT 2097 ACT 87 BUF 9)               OT 3 ACT 3 BUF 0)                      
[2m[36m(RolloutWorker pid=19204)[0m delta_wait_time_reward 0.85[32m [repeated 10x across cluster][0m
[2m[36m(RolloutWorker pid=19204)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 7ms, vehicles TOT 0 ACT 0 BUF 0)                      [32m [repeated 10x across cluster][0m
Step #3600.00 (1ms ~= 1000.00*RT, ~130000.00UPS, TraCI: 99ms, vehicles TOT 2077 ACT 130 BUT 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~142000.00UPS, TraCI: 101ms, vehicles TOT 2085 ACT 142 BOT 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~204000.00UPS, TraCI: 122ms, vehicles TOT 1543 ACT 204 BOT 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~205000.00UPS, TraCI: 123ms, vehicles TOT 1750 ACT 205 B28ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.00*RT, ~217000.00UPS, TraCI: 104ms, vehicles TOT 1

[2m[36m(RolloutWorker pid=3172)[0m Error: tcpip::Socket::accept() Unable to create listening socket: Address already in use
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=19204)[0m Quitting (on error).


Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 88ms, vehicles TOT 2106 ACT 51 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 93ms, vehicles TOT 2106 ACT 49 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~49000.00UPS, TraCI: 83ms, vehicles TOT 2106 ACT 49 BUF 8ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.00*RT, ~50000.00UPS, TraCI: 85ms, vehicles TOT 2106 ACT 50 BUF 22ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.00*RT, ~53000.00UPS, TraCI: 70ms, vehicles TOT 2106 ACT 53 BUF 16ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.00*RT, ~58000.00UPS, TraCI: 62ms, vehicles TOT 2106 ACT 58 BUF OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 65ms, vehicles TOT 2106 ACT 45 BUF 0)               T 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 73ms, vehicles TOT 2106 ACT 46 BUF 0)               OT

[2m[36m(RolloutWorker pid=15836)[0m 2023-06-05 03:33:27,843	ERROR actor_manager.py:179 -- Worker exception, recreating: unpack requires a buffer of 3 bytes
[2m[36m(RolloutWorker pid=15836)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=15836)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\utils\actor_manager.py", line 176, in apply
[2m[36m(RolloutWorker pid=15836)[0m     return func(self, *args, **kwargs)
[2m[36m(RolloutWorker pid=15836)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 86, in <lambda>
[2m[36m(RolloutWorker pid=15836)[0m     lambda w: w.sample(), local_worker=False, healthy_only=True
[2m[36m(RolloutWorker pid=15836)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
[2m[36m(RolloutWorker pid=15836)[0m     return method(self, *_args, **_kwargs)
[2m[36m(RolloutWorker pid=1

[2m[36m(RolloutWorker pid=8044)[0m delta_wait_time_reward 0.85
[2m[36m(RolloutWorker pid=8044)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 9ms, vehicles TOT 0 ACT 0 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 57ms, vehicles TOT 2106 ACT 47 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 57ms, vehicles TOT 2106 ACT 47 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 57ms, vehicles TOT 2106 ACT 47 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 73ms, vehicles TOT 2106 ACT 46 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 77ms, vehicles TOT 2106 ACT 42 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 73ms, vehicles TOT 2106 ACT 48 BUF 0)               9ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.

[2m[36m(RolloutWorker pid=18280)[0m Error: tcpip::Socket::accept() Unable to create listening socket: Address already in use
[2m[36m(RolloutWorker pid=18280)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m     sys.exit(1)[32m [repeated 3x across cluster][0m
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m   File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task[32m [repeated 4x across cluster][0m
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m Quitting (on error).
[2m[36m(RolloutWorker pid=3172)[0m Traceback (most recent call last):[32m [repeated 2x across clu

Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 81ms, vehicles TOT 2106 ACT 43 BUF 0)               T 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 79ms, vehicles TOT 2106 ACT 47 BUF 0)               25ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.00*RT, ~38000.00UPS, TraCI: 88ms, vehicles TOT 2106 ACT 38 BUF T 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 88ms, vehicles TOT 2106 ACT 44 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 85ms, vehicles TOT 2106 ACT 43 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~39000.00UPS, TraCI: 85ms, vehicles TOT 2106 ACT 39 BUF OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 72ms, vehicles TOT 2106 ACT 49 BUF 0)               18ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.00*RT, ~42000.00UPS, TraCI: 88ms, vehicles TOT 2106 ACT 42 BUF OT

[2m[36m(RolloutWorker pid=25380)[0m 2023-06-05 04:29:14,342	ERROR actor_manager.py:179 -- Worker exception, recreating: unpack requires a buffer of 3 bytes
[2m[36m(RolloutWorker pid=25380)[0m     return func(self, *args, **kwargs)
[2m[36m(RolloutWorker pid=25380)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 86, in <lambda>
[2m[36m(RolloutWorker pid=25380)[0m     lambda w: w.sample(), local_worker=False, healthy_only=True
[2m[36m(RolloutWorker pid=25380)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\evaluation\rollout_worker.py", line 915, in sample
[2m[36m(RolloutWorker pid=25380)[0m     batches = [self.input_reader.next()]
[2m[36m(RolloutWorker pid=25380)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\evaluation\sampler.py", line 92, in next
[2m[36m(RolloutWorker pid=25380)[0m     batches = [self.get_data()]
[2m[36m(RolloutWorker pid=25380)[0m 

[2m[36m(RolloutWorker pid=19384)[0m delta_wait_time_reward 0.85
[2m[36m(RolloutWorker pid=19384)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 9ms, vehicles TOT 0 ACT 0 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~48000.00UPS, TraCI: 82ms, vehicles TOT 2106 ACT 48 BUF T 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~48000.00UPS, TraCI: 79ms, vehicles TOT 2106 ACT 48 BUF OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 73ms, vehicles TOT 2106 ACT 46 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 69ms, vehicles TOT 2106 ACT 47 BUF 0)               8ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.00*RT, ~43000.00UPS, TraCI: 71ms, vehicles TOT 2106 ACT 43 BUF 20ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 71ms, vehicles TOT 2106 ACT 51 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?

[2m[36m(RolloutWorker pid=31196)[0m Error: tcpip::Socket::accept() Unable to create listening socket: Address already in use
[2m[36m(RolloutWorker pid=31196)[0m Quitting (on error).


[2m[36m(RolloutWorker pid=8564)[0m  Retrying in 1 seconds
[2m[36m(RolloutWorker pid=8564)[0m Could not connect to TraCI server at localhost:54021 [WinError 10061] No connection could be made because the target machine actively refused it
[2m[36m(RolloutWorker pid=8564)[0m  Retrying in 1 seconds
[2m[36m(RolloutWorker pid=8564)[0m Could not connect to TraCI server at localhost:54021 [WinError 10061] No connection could be made because the target machine actively refused it
[2m[36m(RolloutWorker pid=8564)[0m  Retrying in 1 seconds
[2m[36m(RolloutWorker pid=8564)[0m Could not connect to TraCI server at localhost:54021 [WinError 10061] No connection could be made because the target machine actively refused it
[2m[36m(RolloutWorker pid=8564)[0m  Retrying in 1 seconds
[2m[36m(RolloutWorker pid=8564)[0m Could not connect to TraCI server at localhost:54021 [WinError 10061] No connection could be made because the target machine actively refused it
[2m[36m(RolloutWorker 

[2m[36m(RolloutWorker pid=8564)[0m 2023-06-05 05:51:17,269	ERROR actor_manager.py:179 -- Worker exception, recreating: Could not connect in 61 tries
[2m[36m(RolloutWorker pid=8564)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=8564)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\utils\actor_manager.py", line 176, in apply
[2m[36m(RolloutWorker pid=8564)[0m     return func(self, *args, **kwargs)
[2m[36m(RolloutWorker pid=8564)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 86, in <lambda>
[2m[36m(RolloutWorker pid=8564)[0m     lambda w: w.sample(), local_worker=False, healthy_only=True
[2m[36m(RolloutWorker pid=8564)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
[2m[36m(RolloutWorker pid=8564)[0m     return method(self, *_args, **_kwargs)
[2m[36m(RolloutWorker pid=8564)[0m   Fil

[2m[36m(RolloutWorker pid=8564)[0m Could not connect to TraCI server at localhost:54021 [WinError 10061] No connection could be made because the target machine actively refused it


[2m[36m(RolloutWorker pid=8564)[0m 2023-06-05 05:52:17,275	ERROR worker.py:844 -- Worker exits with an exit code 1.
[2m[36m(RolloutWorker pid=8564)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=8564)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\utils\actor_manager.py", line 176, in apply
[2m[36m(RolloutWorker pid=8564)[0m     return func(self, *args, **kwargs)
[2m[36m(RolloutWorker pid=8564)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 86, in <lambda>
[2m[36m(RolloutWorker pid=8564)[0m     lambda w: w.sample(), local_worker=False, healthy_only=True
[2m[36m(RolloutWorker pid=8564)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
[2m[36m(RolloutWorker pid=8564)[0m     return method(self, *_args, **_kwargs)
[2m[36m(RolloutWorker pid=8564)[0m   File "c:\Users\admin\anaconda3\envs\

[2m[36m(RolloutWorker pid=23192)[0m delta_wait_time_reward 0.85
[2m[36m(RolloutWorker pid=23192)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 10ms, vehicles TOT 0 ACT 0 BUF 0)                     
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 72ms, vehicles TOT 2106 ACT 43 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 68ms, vehicles TOT 2106 ACT 40 BUF 0)               T 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~42000.00UPS, TraCI: 69ms, vehicles TOT 2106 ACT 42 BUF 25ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (1ms ~= 1000.00*RT, ~47000.00UPS, TraCI: 87ms, vehicles TOT 2106 ACT 47 BUF T 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 80ms, vehicles TOT 2106 ACT 41 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 80ms, vehicles TOT 2106 ACT 43 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000

[2m[36m(RolloutWorker pid=29588)[0m 2023-06-05 06:06:49,937	ERROR actor_manager.py:179 -- Worker exception, recreating: unpack requires a buffer of 3 bytes
[2m[36m(RolloutWorker pid=29588)[0m Traceback (most recent call last):
[2m[36m(RolloutWorker pid=29588)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\utils\actor_manager.py", line 176, in apply
[2m[36m(RolloutWorker pid=29588)[0m     return func(self, *args, **kwargs)
[2m[36m(RolloutWorker pid=29588)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\rllib\execution\rollout_ops.py", line 86, in <lambda>
[2m[36m(RolloutWorker pid=29588)[0m     lambda w: w.sample(), local_worker=False, healthy_only=True
[2m[36m(RolloutWorker pid=29588)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_span
[2m[36m(RolloutWorker pid=29588)[0m     return method(self, *_args, **_kwargs)
[2m[36m(RolloutWorker pid=2

[2m[36m(RolloutWorker pid=424)[0m delta_wait_time_reward 0.85
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 75ms, vehicles TOT 2106 ACT 41 BUF 0)               24ms, vehicles TOT 3 ACT 3 BUF 0)      
[2m[36m(RolloutWorker pid=424)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 9ms, vehicles TOT 0 ACT 0 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 80ms, vehicles TOT 2106 ACT 50 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 69ms, vehicles TOT 2106 ACT 43 BUF 0)               T 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~43000.00UPS, TraCI: 65ms, vehicles TOT 2106 ACT 43 BUF 15ms, vehicles TOT 3 ACT 3 BUF 0)      
Step #3600.00 (0ms ?*RT. ?UPS, TraCI: 79ms, vehicles TOT 2106 ACT 40 BUF 0)               OT 3 ACT 3 BUF 0)                      
Step #3600.00 (1ms ~= 1000.00*RT, ~45000.00UPS, TraCI: 74ms, vehicles TOT 2106 ACT 45 BUF OT 3 ACT 3 BUF 0)                      
Step #3600.00 (0ms ?*RT. ?UPS

In [14]:
str(toc - tic)

'5:28:58.090030'

In [15]:
ray.shutdown()

## Play Trained Agent

In [None]:
csv_path = os.path.join(csv_dir, "trained.csv")
tb_log_dir = os.path.join("logs", ENV_NAME, f"PPO_{TEST_NUM}", "eval_trained")

with open(csv_path, "a", newline="") as f:
    csv_writer = csv.writer(f)
    csv_writer.writerow(["sim_time", "arrived_num", "sys_tyre_pm", "sys_stopped",
                         "sys_total_wait", "sys_avg_wait", "sys_avg_speed",
                         "agents_tyre_pm", "agents_stopped", "agents_total_wait",
                         "agents_avg_speed", "agents_total_pressure"])

In [16]:
register_env(ENV_NAME, lambda config: MultiAgentEnvCompatibility(eval_env_creator(csv_path, tb_log_dir)))

In [17]:
ray.init()

2023-06-05 06:53:20,405	INFO worker.py:1625 -- Started a local Ray instance.


0,1
Python version:,3.8.16
Ray version:,2.4.0


In [18]:
from ray.rllib.algorithms.ppo import PPO

checkpoint_path = os.path.join("ray_checkpoints",ENV_NAME,f"test_{TEST_NUM}",f"checkpoint_{TRAIN_EPS:06}")
checkpoint_path = os.path.abspath(checkpoint_path)
print(checkpoint_path)

ppo_agent = PPO.from_checkpoint(checkpoint_path)



c:\Users\admin\masters-proj\ray_checkpoints\grid2x2\test_7\checkpoint_001400
[2m[36m(RolloutWorker pid=21568)[0m delta_wait_time_reward 0.85
[2m[36m(RolloutWorker pid=21568)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 9ms, vehicles TOT 0 ACT 0 BUF 0)                      


2023-06-05 06:53:35,251	INFO trainable.py:172 -- Trainable.setup took 11.226 seconds. If your trainable is slow to initialize, consider setting reuse_actors=True to reduce actor creation overheads.


In [20]:
ppo_agent.evaluate()

Step #3600.00 (1ms ~= 1000.00*RT, ~45000.00UPS, TraCI: 163ms, vehicles TOT 2106 ACT 45 BUFOT 3 ACT 3 BUF 0)                      
[2m[36m(RolloutWorker pid=29284)[0m delta_wait_time_reward 0.85[32m [repeated 10x across cluster][0m
[2m[36m(RolloutWorker pid=29284)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 6ms, vehicles TOT 0 ACT 0 BUF 0)                      [32m [repeated 10x across cluster][0m


{'evaluation': {'episode_reward_max': -76934.74607057456,
  'episode_reward_min': -76934.74607057456,
  'episode_reward_mean': -76934.74607057456,
  'episode_len_mean': 720.0,
  'episode_media': {},
  'episodes_this_iter': 1,
  'policy_reward_min': {'1': -19229.96091845558,
   '2': -18779.31926716052,
   '5': -19191.235200160903,
   '6': -19734.23068479743},
  'policy_reward_max': {'1': -19229.96091845558,
   '2': -18779.31926716052,
   '5': -19191.235200160903,
   '6': -19734.23068479743},
  'policy_reward_mean': {'1': -19229.96091845558,
   '2': -18779.31926716052,
   '5': -19191.235200160903,
   '6': -19734.23068479743},
  'custom_metrics': {},
  'hist_stats': {'episode_reward': [-76934.74607057456],
   'episode_lengths': [720],
   'policy_1_reward': [-19229.96091845558],
   'policy_2_reward': [-18779.31926716052],
   'policy_5_reward': [-19191.235200160903],
   'policy_6_reward': [-19734.23068479743]},
  'sampler_perf': {'mean_raw_obs_processing_ms': 4.243369241362637,
   'mean_inf

In [21]:
ray.shutdown()

