# RLlib Sumo Test

In [6]:
import argparse
from copy import deepcopy
import logging
import os
import pathlib
from pprint import pformat

import ray
from ray import air, tune

from ray.rllib.algorithms.ppo import PPOConfig
from ray.rllib.algorithms.ppo.ppo_torch_policy import PPOTorchPolicy
from ray.rllib.examples.simulators.sumo import marlenvironment
from ray.rllib.utils.test_utils import check_learning_achieved

In [2]:
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger("ppotrain")

In [3]:
ray.init()
tune.register_env("sumo_test_env", marlenvironment.env_creator)

2023-05-01 15:35:08,053	INFO worker.py:1625 -- Started a local Ray instance.


In [13]:
# Algorithm.
policy_class = PPOTorchPolicy
config = (
    PPOConfig()
    .rollouts(
        num_rollout_workers=0,
        batch_mode="complete_episodes",
    )
    .training(
        gamma=0.99,
        lr=2.5e-4,
        lambda_=0.95,
        sgd_minibatch_size=256,
        num_sgd_iter=4,
        vf_loss_coeff=1.0,
        entropy_coeff=1e-3,
        clip_param=0.1,
        vf_clip_param=None,
        grad_clip=0.5,
    )
    .resources(num_gpus=int(os.environ.get("RLLIB_NUM_GPUS", "0")))
    .reporting(min_time_s_per_iteration=5)
)

In [19]:
# Load scenario config for RL env
scenario_config = deepcopy(marlenvironment.DEFAULT_SCENARIO_CONFING)
scenario_config["seed"] = 42
scenario_config["log_level"] = "INFO"
scenario_config["sumo_config"]["sumo_connector"] = "traci"
scenario_config["sumo_config"]["sumo_gui"] = False
scenario_config["sumo_config"]["sumo_cfg"] = os.path.join("nets","RESCO","grid4x4","grid4x4_1.sumocfg")

scenario_config["sumo_config"]["sumo_params"] = ["--collision.action", "warn"]
scenario_config["sumo_config"]["trace_file"] = True
scenario_config["sumo_config"]["end_of_sim"] = 3600  # seconds
scenario_config["sumo_config"]["update_freq"] = 5  # no. traci.simulationStep() for each learning step.
scenario_config["sumo_config"]["log_level"] = "INFO"
logger.info("Scenario Configuration: \n %s", pformat(scenario_config))

In [None]:
# Associate the agents with their configuration.
agent_init = {
    "agent_0": deepcopy(marlenvironment.DEFAULT_AGENT_CONFING),
    "agent_1": deepcopy(marlenvironment.DEFAULT_AGENT_CONFING),
}
logger.info("Agents Configuration: \n %s", pformat(agent_init))

# Init MARL env
env_config = {
    "agent_init": agent_init,
    "scenario_config": scenario_config,
}
marl_env = marlenvironment.SUMOTestMultiAgentEnv(env_config)