Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
16 changed files
with
1,427 additions
and
128 deletions.
There are no files selected for viewing
127 changes: 127 additions & 0 deletions
127
examples/exp_configs/rl/multiagent/adversarial_figure_eight.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,127 @@ | ||
"""Example of a multi-agent environment containing a figure eight. | ||
This example consists of one autonomous vehicle and an adversary that is | ||
allowed to perturb the accelerations of figure eight. | ||
""" | ||
|
||
# WARNING: Expected total reward is zero as adversary reward is | ||
# the negative of the AV reward | ||
|
||
from copy import deepcopy | ||
from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy | ||
from flow.controllers import ContinuousRouter | ||
from flow.controllers import IDMController | ||
from flow.controllers import RLController | ||
from flow.core.params import EnvParams | ||
from flow.core.params import InitialConfig | ||
from flow.core.params import NetParams | ||
from flow.core.params import SumoParams | ||
from flow.core.params import SumoCarFollowingParams | ||
from flow.core.params import VehicleParams | ||
from flow.networks.figure_eight import ADDITIONAL_NET_PARAMS | ||
from flow.envs.multiagent import AdversarialAccelEnv | ||
from flow.networks import FigureEightNetwork | ||
from flow.utils.registry import make_create_env | ||
from ray.tune.registry import register_env | ||
|
||
# time horizon of a single rollout | ||
HORIZON = 1500 | ||
# number of rollouts per training iteration | ||
N_ROLLOUTS = 4 | ||
# number of parallel workers | ||
N_CPUS = 2 | ||
# number of human-driven vehicles | ||
N_HUMANS = 13 | ||
# number of automated vehicles | ||
N_AVS = 1 | ||
|
||
# We place one autonomous vehicle and 13 human-driven vehicles in the network | ||
vehicles = VehicleParams() | ||
vehicles.add( | ||
veh_id='human', | ||
acceleration_controller=(IDMController, { | ||
'noise': 0.2 | ||
}), | ||
routing_controller=(ContinuousRouter, {}), | ||
car_following_params=SumoCarFollowingParams( | ||
speed_mode='obey_safe_speed', | ||
), | ||
num_vehicles=N_HUMANS) | ||
vehicles.add( | ||
veh_id='rl', | ||
acceleration_controller=(RLController, {}), | ||
routing_controller=(ContinuousRouter, {}), | ||
car_following_params=SumoCarFollowingParams( | ||
speed_mode='obey_safe_speed', | ||
), | ||
num_vehicles=N_AVS) | ||
|
||
flow_params = dict( | ||
# name of the experiment | ||
exp_tag='adversarial_figure_eight', | ||
|
||
# name of the flow environment the experiment is running on | ||
env_name=AdversarialAccelEnv, | ||
|
||
# name of the network class the experiment is running on | ||
network=FigureEightNetwork, | ||
|
||
# simulator that is used by the experiment | ||
simulator='traci', | ||
|
||
# sumo-related parameters (see flow.core.params.SumoParams) | ||
sim=SumoParams( | ||
sim_step=0.1, | ||
render=False, | ||
), | ||
|
||
# environment related parameters (see flow.core.params.EnvParams) | ||
env=EnvParams( | ||
horizon=HORIZON, | ||
additional_params={ | ||
'target_velocity': 20, | ||
'max_accel': 3, | ||
'max_decel': 3, | ||
'perturb_weight': 0.03, | ||
'sort_vehicles': False | ||
}, | ||
), | ||
|
||
# network-related parameters (see flow.core.params.NetParams and the | ||
# network's documentation or ADDITIONAL_NET_PARAMS component) | ||
net=NetParams( | ||
additional_params=deepcopy(ADDITIONAL_NET_PARAMS), | ||
), | ||
|
||
# vehicles to be placed in the network at the start of a rollout (see | ||
# flow.core.params.VehicleParams) | ||
veh=vehicles, | ||
|
||
# parameters specifying the positioning of vehicles upon initialization/ | ||
# reset (see flow.core.params.InitialConfig) | ||
initial=InitialConfig(), | ||
) | ||
|
||
|
||
create_env, env_name = make_create_env(params=flow_params, version=0) | ||
|
||
# Register as rllib env | ||
register_env(env_name, create_env) | ||
|
||
test_env = create_env() | ||
obs_space = test_env.observation_space | ||
act_space = test_env.action_space | ||
|
||
|
||
def gen_policy(): | ||
"""Generate a policy in RLlib.""" | ||
return PPOTFPolicy, obs_space, act_space, {} | ||
|
||
|
||
# Setup PG with an ensemble of `num_policies` different policy graphs | ||
POLICY_GRAPHS = {'av': gen_policy(), 'adversary': gen_policy()} | ||
|
||
|
||
def policy_mapping_fn(agent_id): | ||
"""Map a policy in RLlib.""" | ||
return agent_id |
122 changes: 122 additions & 0 deletions
122
examples/exp_configs/rl/multiagent/lord_of_the_rings.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
"""Ring road example. | ||
Creates a set of stabilizing the ring experiments to test if | ||
more agents -> fewer needed batches | ||
""" | ||
from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy | ||
from flow.controllers import ContinuousRouter | ||
from flow.controllers import IDMController | ||
from flow.controllers import RLController | ||
from flow.core.params import EnvParams | ||
from flow.core.params import InitialConfig | ||
from flow.core.params import NetParams | ||
from flow.core.params import SumoParams | ||
from flow.core.params import VehicleParams | ||
from flow.envs.multiagent import MultiWaveAttenuationPOEnv | ||
from flow.networks import MultiRingNetwork | ||
from flow.utils.registry import make_create_env | ||
from ray.tune.registry import register_env | ||
|
||
# make sure (sample_batch_size * num_workers ~= train_batch_size) | ||
# time horizon of a single rollout | ||
HORIZON = 3000 | ||
# Number of rings | ||
NUM_RINGS = 1 | ||
# number of rollouts per training iteration | ||
N_ROLLOUTS = 20 # int(20/NUM_RINGS) | ||
# number of parallel workers | ||
N_CPUS = 2 # int(20/NUM_RINGS) | ||
|
||
# We place one autonomous vehicle and 21 human-driven vehicles in the network | ||
vehicles = VehicleParams() | ||
for i in range(NUM_RINGS): | ||
vehicles.add( | ||
veh_id='human_{}'.format(i), | ||
acceleration_controller=(IDMController, { | ||
'noise': 0.2 | ||
}), | ||
routing_controller=(ContinuousRouter, {}), | ||
num_vehicles=21) | ||
vehicles.add( | ||
veh_id='rl_{}'.format(i), | ||
acceleration_controller=(RLController, {}), | ||
routing_controller=(ContinuousRouter, {}), | ||
num_vehicles=1) | ||
|
||
flow_params = dict( | ||
# name of the experiment | ||
exp_tag='lord_of_numrings{}'.format(NUM_RINGS), | ||
|
||
# name of the flow environment the experiment is running on | ||
env_name=MultiWaveAttenuationPOEnv, | ||
|
||
# name of the network class the experiment is running on | ||
network=MultiRingNetwork, | ||
|
||
# simulator that is used by the experiment | ||
simulator='traci', | ||
|
||
# sumo-related parameters (see flow.core.params.SumoParams) | ||
sim=SumoParams( | ||
sim_step=0.1, | ||
render=False, | ||
), | ||
|
||
# environment related parameters (see flow.core.params.EnvParams) | ||
env=EnvParams( | ||
horizon=HORIZON, | ||
warmup_steps=750, | ||
additional_params={ | ||
'max_accel': 1, | ||
'max_decel': 1, | ||
'ring_length': [230, 230], | ||
'target_velocity': 4 | ||
}, | ||
), | ||
|
||
# network-related parameters (see flow.core.params.NetParams and the | ||
# network's documentation or ADDITIONAL_NET_PARAMS component) | ||
net=NetParams( | ||
additional_params={ | ||
'length': 230, | ||
'lanes': 1, | ||
'speed_limit': 30, | ||
'resolution': 40, | ||
'num_rings': NUM_RINGS | ||
}, ), | ||
|
||
# vehicles to be placed in the network at the start of a rollout (see | ||
# flow.core.params.VehicleParams) | ||
veh=vehicles, | ||
|
||
# parameters specifying the positioning of vehicles upon initialization/ | ||
# reset (see flow.core.params.InitialConfig) | ||
initial=InitialConfig(bunching=20.0, spacing='custom'), | ||
) | ||
|
||
|
||
create_env, env_name = make_create_env(params=flow_params, version=0) | ||
|
||
# Register as rllib env | ||
register_env(env_name, create_env) | ||
|
||
test_env = create_env() | ||
obs_space = test_env.observation_space | ||
act_space = test_env.action_space | ||
|
||
|
||
def gen_policy(): | ||
"""Generate a policy in RLlib.""" | ||
return PPOTFPolicy, obs_space, act_space, {} | ||
|
||
|
||
# Setup PG with an ensemble of `num_policies` different policy graphs | ||
POLICY_GRAPHS = {'av': gen_policy()} | ||
|
||
|
||
def policy_mapping_fn(_): | ||
"""Map a policy in RLlib.""" | ||
return 'av' | ||
|
||
|
||
POLICIES_TO_TRAIN = ['av'] |
Oops, something went wrong.