In [1]:
import os

import ray
import supersuit as ss
from ray import tune
from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.env.wrappers.pettingzoo_env import ParallelPettingZooEnv  # RLlib-PZ interface
from ray.tune.registry import register_env

In [2]:
import sumo_rl
from reward_functions import diff_wait_time

def env_creator(args):
    env_params = {
        "net_file": os.path.join("nets","RESCO","grid4x4","grid4x4.net.xml"),
        "route_file": os.path.join("nets","RESCO","grid4x4","grid4x4_1.rou.xml"),
        "num_seconds": 3600,
        "reward_fn": diff_wait_time,
        "sumo_seed": 42,
    }
    env = sumo_rl.parallel_env(**env_params)
    return env

In [3]:
ray.init()

env_name = "grid4x4"

register_env(env_name, lambda config: ParallelPettingZooEnv(env_creator(config)))

2023-05-02 11:33:57,679	INFO worker.py:1625 -- Started a local Ray instance.


In [17]:
config = (
    PPOConfig()
    .environment(env=env_name)
    .rollouts(num_rollout_workers=0, batch_mode="complete_episodes")
    .framework(framework="torch")
    .training(
        gamma=0.99,
        lr=2.5e-4,
        lambda_=0.95,
        sgd_minibatch_size=256,
        num_sgd_iter=4,
        vf_loss_coeff=1.0,
        entropy_coeff=1e-3,
        clip_param=0.1,
        grad_clip=0.5,
    )
    .debugging(log_level="INFO")
    .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
)

In [18]:
algo = config.build()

2023-05-02 12:07:44,035	INFO policy.py:1285 -- Policy (worker=local) running on CPU.
2023-05-02 12:07:44,036	INFO torch_policy_v2.py:110 -- Found 1 visible cuda devices.
2023-05-02 12:07:44,041	INFO util.py:118 -- Using connectors:
2023-05-02 12:07:44,042	INFO util.py:119 --     AgentConnectorPipeline
        ObsPreprocessorConnector
        StateBufferConnector
        ViewRequirementAgentConnector
2023-05-02 12:07:44,042	INFO util.py:120 --     ActionConnectorPipeline
        ConvertToNumpyConnector
        NormalizeActionsConnector
        ImmutableActionsConnector
2023-05-02 12:07:44,043	INFO rollout_worker.py:2000 -- Built policy map: <PolicyMap lru-caching-capacity=100 policy-IDs=['default_policy']>
2023-05-02 12:07:44,043	INFO rollout_worker.py:2001 -- Built preprocessor map: {'default_policy': None}
2023-05-02 12:07:44,044	INFO rollout_worker.py:761 -- Built filter map: defaultdict(<class 'ray.rllib.utils.filter.NoFilter'>, {})


In [19]:
from ray.tune.logger import pretty_print

result = algo.train()
print(pretty_print(result))

2023-05-02 12:08:00,843	INFO rollout_worker.py:909 -- Generating sample batch of size 4000
Exception ignored in: <function SumoEnvironment.__del__ at 0x000001BA2C127250>
Traceback (most recent call last):
  File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\sumo_rl\environment\env.py", line 446, in __del__
    self.close()
  File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\sumo_rl\environment\env.py", line 436, in close
    traci.close()
  File "C:\Program Files (x86)\Eclipse\Sumo\tools\traci\main.py", line 263, in close
    _connections[""].close(wait)
  File "C:\Program Files (x86)\Eclipse\Sumo\tools\traci\connection.py", line 355, in close
    self._sendCmd(tc.CMD_CLOSE, None, None)
  File "C:\Program Files (x86)\Eclipse\Sumo\tools\traci\connection.py", line 189, in _sendCmd
    return self._sendExact()
  File "C:\Program Files (x86)\Eclipse\Sumo\tools\traci\connection.py", line 106, in _sendExact
    raise TraCIException(err, prefix[1], _RESULTS[prefix[2]])
KeyErr

agent_timesteps_total: 69120
connector_metrics:
  ObsPreprocessorConnector_ms: 0.06513198216756184
  StateBufferConnector_ms: 0.0
  ViewRequirementAgentConnector_ms: 0.6788134574890137
counters:
  num_agent_steps_sampled: 69120
  num_agent_steps_trained: 69120
  num_env_steps_sampled: 4320
  num_env_steps_trained: 4320
custom_metrics: {}
date: 2023-05-02_12-15-25
done: false
episode_len_mean: 720.0
episode_media: {}
episode_reward_max: -246.0
episode_reward_mean: -482.1666666666667
episode_reward_min: -732.0
episodes_this_iter: 6
episodes_total: 6
hostname: JM-M16
info:
  learner:
    default_policy:
      custom_metrics: {}
      diff_num_grad_updates_vs_sampler_policy: 539.5
      learner_stats:
        allreduce_latency: 0.0
        cur_kl_coeff: 0.19999999999999998
        cur_lr: 0.00025
        entropy: 2.0776924362889044
        entropy_coeff: 0.001
        grad_gnorm: 0.426115154116242
        kl: 0.0017535451632545152
        policy_loss: -0.002954182497988869
        total_lo

In [None]:
algo.evaluate()

In [None]:
# tune.run(
#     "PPO",
#     name="PPO",
#     stop={"timesteps_total": 500},
#     checkpoint_freq=10,
#     local_dir="~/ray_results/" + env_name,
#     config=config.to_dict(),
# )

[2m[36m(RolloutWorker pid=11780)[0m   File "python\ray\_raylet.pyx", line 881, in ray._raylet.execute_task[32m [repeated 8x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m
[2m[36m(RolloutWorker pid=11780)[0m   File "python\ray\_raylet.pyx", line 821, in ray._raylet.execute_task.function_executor[32m [repeated 4x across cluster][0m
[2m[36m(RolloutWorker pid=11780)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\_private\function_manager.py", line 670, in actor_method_executor[32m [repeated 4x across cluster][0m
[2m[36m(RolloutWorker pid=11780)[0m     return method(__ray_actor, *args, **kwargs)[32m [repeated 4x across cluster][0m
[2m[36m(RolloutWorker pid=11780)[0m   File "c:\Users\admin\anaconda3\envs\marl\lib\site-packages\ray\util\tracing\tracing_helper.py", line 460, in _resume_

TuneError: ('Trials did not complete', [PPO_pistonball_v6_b5f2c_00000])

In [None]:
ray.shutdown()