# Grid4x4 - PettingZoo + RLlib

In [1]:
import os

import ray
from ray import tune
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv  # RLlib-PZ interface
from ray.tune.registry import register_env
import supersuit as ss

from helper_functions import make_parallel_env

In [2]:
import random
import numpy as np
import torch

SEED = 23423  # default SUMO seed no.
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x1aac302ef70>

In [3]:
from observation import Grid4x4ObservationFunction
from reward_functions import combined_reward

def env_creator(args):
    env_params = {
        "net_file": os.path.join("nets","grid4x4","grid4x4.net.xml"),
        "route_file": os.path.join("nets","grid4x4","grid4x4_1.rou.xml"),
        "num_seconds": 3600,
        "reward_fn": combined_reward,
        "sumo_seed": SEED,
        "observation_class": Grid4x4ObservationFunction
    }
    env = make_parallel_env(**env_params)
    env = ss.pad_observations_v0(env)
    env = ss.frame_stack_v1(env, 3)
    return env

In [11]:
ray.init()

env_name = "grid4x4"

register_env(env_name, lambda config: ParallelPettingZooEnv(env_creator(config)))

2023-05-27 00:08:05,222	INFO worker.py:1625 -- Started a local Ray instance.


In [12]:
from ray.rllib.algorithms.ppo import PPOConfig

# From https://github.com/ray-project/ray/blob/master/rllib/tuned_examples/ppo/atari-ppo.yaml

config = (
    PPOConfig()
    .environment(env=env_name)
    .framework(framework="torch")
    .rollouts(
        rollout_fragment_length=100,
        num_rollout_workers=10,
        num_envs_per_worker=5,
        batch_mode="truncate_episodes",
    )
    .training(
        lambda_=0.95,
        kl_coeff=0.5,
        clip_param=0.1,
        vf_clip_param=10.0,
        entropy_coeff=0.01,
        train_batch_size=5000,
        sgd_minibatch_size=500,
        num_sgd_iter=10,
    )
    .debugging(log_level="INFO")
    .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "1")))
)

In [9]:
algo = config.build()

[2m[36m(RolloutWorker pid=25312)[0m Step #0.00 (0ms ?*RT. ?UPS, TraCI: 57ms, vehicles TOT 0 ACT 0 BUF 0)                     [32m [repeated 10x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(RolloutWorker pid=34260)[0m Step #0.00[32m [repeated 9x across cluster][0m


[2m[36m(RolloutWorker pid=25312)[0m 2023-05-26 23:53:59,268	ERROR worker.py:844 -- Exception raised in creation task: The actor died because of an error raised in its creation task, [36mray::RolloutWorker.__init__()[39m (pid=25312, ip=127.0.0.1, repr=<ray.rllib.evaluation.rollout_worker.RolloutWorker object at 0x00000259092579A0>)[32m [repeated 10x across cluster][0m
[2m[36m(RolloutWorker pid=25312)[0m   File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task[32m [repeated 20x across cluster][0m
[2m[36m(RolloutWorker pid=25312)[0m   File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor[32m [repeated 10x across cluster][0m
[2m[36m(RolloutWorker pid=25312)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor[32m [repeated 10x across cluster][0m
[2m[36m(RolloutWorker pid=25312)[0m     return method(__ray_actor, *args, **kwargs)[32m [repea

AssertionError: Observation spaces for all agents must be identical. Perhaps SuperSuit's pad_observations wrapper can help (useage: `supersuit.aec_wrappers.pad_observations(env)`

In [None]:
from ray.tune.logger import pretty_print

result = algo.train()
print(pretty_print(result))

2023-05-02 12:08:00,843	INFO rollout_worker.py:909 -- Generating sample batch of size 4000
Exception ignored in: <function SumoEnvironment.__del__ at 0x000001BA2C127250>
Traceback (most recent call last):
  File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\sumo_rl\environment\env.py", line 446, in __del__
    self.close()
  File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\sumo_rl\environment\env.py", line 436, in close
    traci.close()
  File "C:\Program Files (x86)\Eclipse\Sumo\tools\traci\main.py", line 263, in close
    _connections[""].close(wait)
  File "C:\Program Files (x86)\Eclipse\Sumo\tools\traci\connection.py", line 355, in close
    self._sendCmd(tc.CMD_CLOSE, None, None)
  File "C:\Program Files (x86)\Eclipse\Sumo\tools\traci\connection.py", line 189, in _sendCmd
    return self._sendExact()
  File "C:\Program Files (x86)\Eclipse\Sumo\tools\traci\connection.py", line 106, in _sendExact
    raise TraCIException(err, prefix[1], _RESULTS[prefix[2]])
KeyErr

agent_timesteps_total: 69120
connector_metrics:
  ObsPreprocessorConnector_ms: 0.06513198216756184
  StateBufferConnector_ms: 0.0
  ViewRequirementAgentConnector_ms: 0.6788134574890137
counters:
  num_agent_steps_sampled: 69120
  num_agent_steps_trained: 69120
  num_env_steps_sampled: 4320
  num_env_steps_trained: 4320
custom_metrics: {}
date: 2023-05-02_12-15-25
done: false
episode_len_mean: 720.0
episode_media: {}
episode_reward_max: -246.0
episode_reward_mean: -482.1666666666667
episode_reward_min: -732.0
episodes_this_iter: 6
episodes_total: 6
hostname: JM-M16
info:
  learner:
    default_policy:
      custom_metrics: {}
      diff_num_grad_updates_vs_sampler_policy: 539.5
      learner_stats:
        allreduce_latency: 0.0
        cur_kl_coeff: 0.19999999999999998
        cur_lr: 0.00025
        entropy: 2.0776924362889044
        entropy_coeff: 0.001
        grad_gnorm: 0.426115154116242
        kl: 0.0017535451632545152
        policy_loss: -0.002954182497988869
        total_lo

In [None]:
algo.evaluate()

In [None]:
# tune.run(
#     "PPO",
#     name="PPO",
#     stop={"timesteps_total": 500},
#     checkpoint_freq=10,
#     local_dir="~/ray_results/" + env_name,
#     config=config.to_dict(),
# )

[2m[36m(RolloutWorker pid=11780)[0m   File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task[32m [repeated 8x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(RolloutWorker pid=11780)[0m   File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor[32m [repeated 4x across cluster][0m
[2m[36m(RolloutWorker pid=11780)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor[32m [repeated 4x across cluster][0m
[2m[36m(RolloutWorker pid=11780)[0m     return method(__ray_actor, *args, **kwargs)[32m [repeated 4x across cluster][0m
[2m[36m(RolloutWorker pid=11780)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_

TuneError: ('Trials did not complete', [PPO_pistonball_v6_b5f2c_00000])

In [None]:
ray.shutdown()