Skip to content

Commit

Permalink
Multiagent environments (#818)
Browse files Browse the repository at this point in the history
* renamed MultiAgentAccelEnv -> AdversarialAccelEnv and it's dependents as well

* added MultiAgentAccelPOEnv

* bug fix

* bug fix

* added MultiAgentWaveAttenuationPOEnv

* added MultiAgentMergePOEnv

* added tests

* test to observed

* added reset to wave attenuation env

* test to observed in wave attenuation env

* added figure eight example

* added multiagent merge example

* renamed multiagent_ring -> lord_of_the_rings

* added an additional test

* added multiagent ring example

* test not being hit

* added time debugger

* bug fix to done mask

* bug associated with warmup steps
  • Loading branch information
AboudyKreidieh committed Feb 19, 2020
1 parent 7dc2096 commit 3e8fc0c
Show file tree
Hide file tree
Showing 16 changed files with 1,427 additions and 128 deletions.
127 changes: 127 additions & 0 deletions examples/exp_configs/rl/multiagent/adversarial_figure_eight.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""Example of a multi-agent environment containing a figure eight.
This example consists of one autonomous vehicle and an adversary that is
allowed to perturb the accelerations of figure eight.
"""

# WARNING: Expected total reward is zero as adversary reward is
# the negative of the AV reward

from copy import deepcopy
from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
from flow.controllers import ContinuousRouter
from flow.controllers import IDMController
from flow.controllers import RLController
from flow.core.params import EnvParams
from flow.core.params import InitialConfig
from flow.core.params import NetParams
from flow.core.params import SumoParams
from flow.core.params import SumoCarFollowingParams
from flow.core.params import VehicleParams
from flow.networks.figure_eight import ADDITIONAL_NET_PARAMS
from flow.envs.multiagent import AdversarialAccelEnv
from flow.networks import FigureEightNetwork
from flow.utils.registry import make_create_env
from ray.tune.registry import register_env

# time horizon of a single rollout
HORIZON = 1500
# number of rollouts per training iteration
N_ROLLOUTS = 4
# number of parallel workers
N_CPUS = 2
# number of human-driven vehicles
N_HUMANS = 13
# number of automated vehicles
N_AVS = 1

# We place one autonomous vehicle and 13 human-driven vehicles in the network
vehicles = VehicleParams()
vehicles.add(
veh_id='human',
acceleration_controller=(IDMController, {
'noise': 0.2
}),
routing_controller=(ContinuousRouter, {}),
car_following_params=SumoCarFollowingParams(
speed_mode='obey_safe_speed',
),
num_vehicles=N_HUMANS)
vehicles.add(
veh_id='rl',
acceleration_controller=(RLController, {}),
routing_controller=(ContinuousRouter, {}),
car_following_params=SumoCarFollowingParams(
speed_mode='obey_safe_speed',
),
num_vehicles=N_AVS)

flow_params = dict(
# name of the experiment
exp_tag='adversarial_figure_eight',

# name of the flow environment the experiment is running on
env_name=AdversarialAccelEnv,

# name of the network class the experiment is running on
network=FigureEightNetwork,

# simulator that is used by the experiment
simulator='traci',

# sumo-related parameters (see flow.core.params.SumoParams)
sim=SumoParams(
sim_step=0.1,
render=False,
),

# environment related parameters (see flow.core.params.EnvParams)
env=EnvParams(
horizon=HORIZON,
additional_params={
'target_velocity': 20,
'max_accel': 3,
'max_decel': 3,
'perturb_weight': 0.03,
'sort_vehicles': False
},
),

# network-related parameters (see flow.core.params.NetParams and the
# network's documentation or ADDITIONAL_NET_PARAMS component)
net=NetParams(
additional_params=deepcopy(ADDITIONAL_NET_PARAMS),
),

# vehicles to be placed in the network at the start of a rollout (see
# flow.core.params.VehicleParams)
veh=vehicles,

# parameters specifying the positioning of vehicles upon initialization/
# reset (see flow.core.params.InitialConfig)
initial=InitialConfig(),
)


create_env, env_name = make_create_env(params=flow_params, version=0)

# Register as rllib env
register_env(env_name, create_env)

test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space


def gen_policy():
"""Generate a policy in RLlib."""
return PPOTFPolicy, obs_space, act_space, {}


# Setup PG with an ensemble of `num_policies` different policy graphs
POLICY_GRAPHS = {'av': gen_policy(), 'adversary': gen_policy()}


def policy_mapping_fn(agent_id):
"""Map a policy in RLlib."""
return agent_id
122 changes: 122 additions & 0 deletions examples/exp_configs/rl/multiagent/lord_of_the_rings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""Ring road example.
Creates a set of stabilizing the ring experiments to test if
more agents -> fewer needed batches
"""
from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
from flow.controllers import ContinuousRouter
from flow.controllers import IDMController
from flow.controllers import RLController
from flow.core.params import EnvParams
from flow.core.params import InitialConfig
from flow.core.params import NetParams
from flow.core.params import SumoParams
from flow.core.params import VehicleParams
from flow.envs.multiagent import MultiWaveAttenuationPOEnv
from flow.networks import MultiRingNetwork
from flow.utils.registry import make_create_env
from ray.tune.registry import register_env

# make sure (sample_batch_size * num_workers ~= train_batch_size)
# time horizon of a single rollout
HORIZON = 3000
# Number of rings
NUM_RINGS = 1
# number of rollouts per training iteration
N_ROLLOUTS = 20 # int(20/NUM_RINGS)
# number of parallel workers
N_CPUS = 2 # int(20/NUM_RINGS)

# We place one autonomous vehicle and 21 human-driven vehicles in the network
vehicles = VehicleParams()
for i in range(NUM_RINGS):
vehicles.add(
veh_id='human_{}'.format(i),
acceleration_controller=(IDMController, {
'noise': 0.2
}),
routing_controller=(ContinuousRouter, {}),
num_vehicles=21)
vehicles.add(
veh_id='rl_{}'.format(i),
acceleration_controller=(RLController, {}),
routing_controller=(ContinuousRouter, {}),
num_vehicles=1)

flow_params = dict(
# name of the experiment
exp_tag='lord_of_numrings{}'.format(NUM_RINGS),

# name of the flow environment the experiment is running on
env_name=MultiWaveAttenuationPOEnv,

# name of the network class the experiment is running on
network=MultiRingNetwork,

# simulator that is used by the experiment
simulator='traci',

# sumo-related parameters (see flow.core.params.SumoParams)
sim=SumoParams(
sim_step=0.1,
render=False,
),

# environment related parameters (see flow.core.params.EnvParams)
env=EnvParams(
horizon=HORIZON,
warmup_steps=750,
additional_params={
'max_accel': 1,
'max_decel': 1,
'ring_length': [230, 230],
'target_velocity': 4
},
),

# network-related parameters (see flow.core.params.NetParams and the
# network's documentation or ADDITIONAL_NET_PARAMS component)
net=NetParams(
additional_params={
'length': 230,
'lanes': 1,
'speed_limit': 30,
'resolution': 40,
'num_rings': NUM_RINGS
}, ),

# vehicles to be placed in the network at the start of a rollout (see
# flow.core.params.VehicleParams)
veh=vehicles,

# parameters specifying the positioning of vehicles upon initialization/
# reset (see flow.core.params.InitialConfig)
initial=InitialConfig(bunching=20.0, spacing='custom'),
)


create_env, env_name = make_create_env(params=flow_params, version=0)

# Register as rllib env
register_env(env_name, create_env)

test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space


def gen_policy():
"""Generate a policy in RLlib."""
return PPOTFPolicy, obs_space, act_space, {}


# Setup PG with an ensemble of `num_policies` different policy graphs
POLICY_GRAPHS = {'av': gen_policy()}


def policy_mapping_fn(_):
"""Map a policy in RLlib."""
return 'av'


POLICIES_TO_TRAIN = ['av']

0 comments on commit 3e8fc0c

Please sign in to comment.