Multiagent environments (#818)

* renamed MultiAgentAccelEnv -> AdversarialAccelEnv and it's dependents as well * added MultiAgentAccelPOEnv * bug fix * bug fix * added MultiAgentWaveAttenuationPOEnv * added MultiAgentMergePOEnv * added tests * test to observed * added reset to wave attenuation env * test to observed in wave attenuation env * added figure eight example * added multiagent merge example * renamed multiagent_ring -> lord_of_the_rings * added an additional test * added multiagent ring example * test not being hit * added time debugger * bug fix to done mask * bug associated with warmup steps
flow-project · Feb 19, 2020 · 3e8fc0c · 3e8fc0c
1 parent 7dc2096
commit 3e8fc0c
Show file tree

Hide file tree

Showing 16 changed files with 1,427 additions and 128 deletions.
diff --git a/examples/exp_configs/rl/multiagent/adversarial_figure_eight.py b/examples/exp_configs/rl/multiagent/adversarial_figure_eight.py
@@ -0,0 +1,127 @@
+"""Example of a multi-agent environment containing a figure eight.
+
+This example consists of one autonomous vehicle and an adversary that is
+allowed to perturb the accelerations of figure eight.
+"""
+
+# WARNING: Expected total reward is zero as adversary reward is
+# the negative of the AV reward
+
+from copy import deepcopy
+from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
+from flow.controllers import ContinuousRouter
+from flow.controllers import IDMController
+from flow.controllers import RLController
+from flow.core.params import EnvParams
+from flow.core.params import InitialConfig
+from flow.core.params import NetParams
+from flow.core.params import SumoParams
+from flow.core.params import SumoCarFollowingParams
+from flow.core.params import VehicleParams
+from flow.networks.figure_eight import ADDITIONAL_NET_PARAMS
+from flow.envs.multiagent import AdversarialAccelEnv
+from flow.networks import FigureEightNetwork
+from flow.utils.registry import make_create_env
+from ray.tune.registry import register_env
+
+# time horizon of a single rollout
+HORIZON = 1500
+# number of rollouts per training iteration
+N_ROLLOUTS = 4
+# number of parallel workers
+N_CPUS = 2
+# number of human-driven vehicles
+N_HUMANS = 13
+# number of automated vehicles
+N_AVS = 1
+
+# We place one autonomous vehicle and 13 human-driven vehicles in the network
+vehicles = VehicleParams()
+vehicles.add(
+    veh_id='human',
+    acceleration_controller=(IDMController, {
+        'noise': 0.2
+    }),
+    routing_controller=(ContinuousRouter, {}),
+    car_following_params=SumoCarFollowingParams(
+        speed_mode='obey_safe_speed',
+    ),
+    num_vehicles=N_HUMANS)
+vehicles.add(
+    veh_id='rl',
+    acceleration_controller=(RLController, {}),
+    routing_controller=(ContinuousRouter, {}),
+    car_following_params=SumoCarFollowingParams(
+        speed_mode='obey_safe_speed',
+    ),
+    num_vehicles=N_AVS)
+
+flow_params = dict(
+    # name of the experiment
+    exp_tag='adversarial_figure_eight',
+
+    # name of the flow environment the experiment is running on
+    env_name=AdversarialAccelEnv,
+
+    # name of the network class the experiment is running on
+    network=FigureEightNetwork,
+
+    # simulator that is used by the experiment
+    simulator='traci',
+
+    # sumo-related parameters (see flow.core.params.SumoParams)
+    sim=SumoParams(
+        sim_step=0.1,
+        render=False,
+    ),
+
+    # environment related parameters (see flow.core.params.EnvParams)
+    env=EnvParams(
+        horizon=HORIZON,
+        additional_params={
+            'target_velocity': 20,
+            'max_accel': 3,
+            'max_decel': 3,
+            'perturb_weight': 0.03,
+            'sort_vehicles': False
+        },
+    ),
+
+    # network-related parameters (see flow.core.params.NetParams and the
+    # network's documentation or ADDITIONAL_NET_PARAMS component)
+    net=NetParams(
+        additional_params=deepcopy(ADDITIONAL_NET_PARAMS),
+    ),
+
+    # vehicles to be placed in the network at the start of a rollout (see
+    # flow.core.params.VehicleParams)
+    veh=vehicles,
+
+    # parameters specifying the positioning of vehicles upon initialization/
+    # reset (see flow.core.params.InitialConfig)
+    initial=InitialConfig(),
+)
+
+
+create_env, env_name = make_create_env(params=flow_params, version=0)
+
+# Register as rllib env
+register_env(env_name, create_env)
+
+test_env = create_env()
+obs_space = test_env.observation_space
+act_space = test_env.action_space
+
+
+def gen_policy():
+    """Generate a policy in RLlib."""
+    return PPOTFPolicy, obs_space, act_space, {}
+
+
+# Setup PG with an ensemble of `num_policies` different policy graphs
+POLICY_GRAPHS = {'av': gen_policy(), 'adversary': gen_policy()}
+
+
+def policy_mapping_fn(agent_id):
+    """Map a policy in RLlib."""
+    return agent_id
diff --git a/examples/exp_configs/rl/multiagent/lord_of_the_rings.py b/examples/exp_configs/rl/multiagent/lord_of_the_rings.py
@@ -0,0 +1,122 @@
+"""Ring road example.
+
+Creates a set of stabilizing the ring experiments to test if
+ more agents -> fewer needed batches
+"""
+from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
+from flow.controllers import ContinuousRouter
+from flow.controllers import IDMController
+from flow.controllers import RLController
+from flow.core.params import EnvParams
+from flow.core.params import InitialConfig
+from flow.core.params import NetParams
+from flow.core.params import SumoParams
+from flow.core.params import VehicleParams
+from flow.envs.multiagent import MultiWaveAttenuationPOEnv
+from flow.networks import MultiRingNetwork
+from flow.utils.registry import make_create_env
+from ray.tune.registry import register_env
+
+# make sure (sample_batch_size * num_workers ~= train_batch_size)
+# time horizon of a single rollout
+HORIZON = 3000
+# Number of rings
+NUM_RINGS = 1
+# number of rollouts per training iteration
+N_ROLLOUTS = 20  # int(20/NUM_RINGS)
+# number of parallel workers
+N_CPUS = 2  # int(20/NUM_RINGS)
+
+# We place one autonomous vehicle and 21 human-driven vehicles in the network
+vehicles = VehicleParams()
+for i in range(NUM_RINGS):
+    vehicles.add(
+        veh_id='human_{}'.format(i),
+        acceleration_controller=(IDMController, {
+            'noise': 0.2
+        }),
+        routing_controller=(ContinuousRouter, {}),
+        num_vehicles=21)
+    vehicles.add(
+        veh_id='rl_{}'.format(i),
+        acceleration_controller=(RLController, {}),
+        routing_controller=(ContinuousRouter, {}),
+        num_vehicles=1)
+
+flow_params = dict(
+    # name of the experiment
+    exp_tag='lord_of_numrings{}'.format(NUM_RINGS),
+
+    # name of the flow environment the experiment is running on
+    env_name=MultiWaveAttenuationPOEnv,
+
+    # name of the network class the experiment is running on
+    network=MultiRingNetwork,
+
+    # simulator that is used by the experiment
+    simulator='traci',
+
+    # sumo-related parameters (see flow.core.params.SumoParams)
+    sim=SumoParams(
+        sim_step=0.1,
+        render=False,
+    ),
+
+    # environment related parameters (see flow.core.params.EnvParams)
+    env=EnvParams(
+        horizon=HORIZON,
+        warmup_steps=750,
+        additional_params={
+            'max_accel': 1,
+            'max_decel': 1,
+            'ring_length': [230, 230],
+            'target_velocity': 4
+        },
+    ),
+
+    # network-related parameters (see flow.core.params.NetParams and the
+    # network's documentation or ADDITIONAL_NET_PARAMS component)
+    net=NetParams(
+        additional_params={
+            'length': 230,
+            'lanes': 1,
+            'speed_limit': 30,
+            'resolution': 40,
+            'num_rings': NUM_RINGS
+        }, ),
+
+    # vehicles to be placed in the network at the start of a rollout (see
+    # flow.core.params.VehicleParams)
+    veh=vehicles,
+
+    # parameters specifying the positioning of vehicles upon initialization/
+    # reset (see flow.core.params.InitialConfig)
+    initial=InitialConfig(bunching=20.0, spacing='custom'),
+)
+
+
+create_env, env_name = make_create_env(params=flow_params, version=0)
+
+# Register as rllib env
+register_env(env_name, create_env)
+
+test_env = create_env()
+obs_space = test_env.observation_space
+act_space = test_env.action_space
+
+
+def gen_policy():
+    """Generate a policy in RLlib."""
+    return PPOTFPolicy, obs_space, act_space, {}
+
+
+# Setup PG with an ensemble of `num_policies` different policy graphs
+POLICY_GRAPHS = {'av': gen_policy()}
+
+
+def policy_mapping_fn(_):
+    """Map a policy in RLlib."""
+    return 'av'
+
+
+POLICIES_TO_TRAIN = ['av']