Skip to content

Commit

Permalink
Merge pull request #300 from flow-project/remove_clipping
Browse files Browse the repository at this point in the history
Remove clip_action from ray run scripts
- Ray's clip_actions is currently buggy for continuous spaces, we temporarily set it to False since we clip on our own end. 
- Adds Ray 0.6.1 to the environment.yml
- Upgrades test_visuallizers to ray 0.6.1
  • Loading branch information
AboudyKreidieh authored Feb 5, 2019
2 parents 5b772e7 + f6a7728 commit 7b5a881
Show file tree
Hide file tree
Showing 27 changed files with 325 additions and 85 deletions.
17 changes: 0 additions & 17 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,23 +22,6 @@ before_install:
- conda env create -f environment.yml
- source activate flow

# Requirements for Ray
- pip install opencv-python
# Setup ray from https://s3-us-west-2.amazonaws.com/ray-wheels/
- pip install https://s3-us-west-1.amazonaws.com/flow.utils/ray-0.5.3-cp35-cp35m-manylinux1_x86_64.whl

# Copy over local rllib changes (testing branch)
- pushd $HOME/build/flow-project
- git clone https://github.com/flow-project/ray.git
- cd ray && git fetch && git checkout ray_merge && cd ..
- pushd ray
- RAY_SITE_DIR=`python -c "import ray; print(ray.__path__[0])"`
- rm -rf $RAY_SITE_DIR/rllib
- ls $RAY_SITE_DIR
- cp -r python/ray/rllib $RAY_SITE_DIR/rllib
- popd
- popd

# [sumo] dependencies and binaries
- pushd $HOME/build/flow-project
- ./flow/scripts/setup_sumo_ubuntu1404.sh
Expand Down
7 changes: 5 additions & 2 deletions docs/source/flow_setup.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,12 @@ optional but may be of use when trying to execute some of the examples files
located in Flow.
RLlib is one such library.
First visit <https://github.com/flow-project/ray/blob/master/doc/source/installation.rst> and
install the required packages. Do NOT `pip install ray`.
install the required packages.

The installation process for this library is as follows:
If you are not intending to develop RL algorithms or customize rllib you don't need to do anything,
Ray was installed when you created the conda environment.

If you are intending to modify Ray, the installation process for this library is as follows:

::

Expand Down
6 changes: 5 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ dependencies:
- python-dateutil==2.7.3
- tensorflow==1.9.0
- cloudpickle==0.5.3
- opencv
- setuptools==39.1.0
- pip:
- gym==0.10.5
Expand All @@ -19,6 +18,11 @@ dependencies:
- cached_property
- joblib==0.10.3
- dill
- lz4
- ray==0.6.1
- setproctitle
- psutil
- opencv-python
- boto3==1.4.8
- redis~=2.10.6
- git+https://github.com/Theano/Theano.git@adfe319ce6b781083d8dc3200fb4481b00853791#egg=Theano
Expand Down
1 change: 1 addition & 0 deletions examples/rllib/cooperative_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ def setup_exps():
config['lambda'] = 0.97
config['kl_target'] = 0.02
config['num_sgd_iter'] = 10
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config['horizon'] = HORIZON

# save the flow params for replay
Expand Down
1 change: 1 addition & 0 deletions examples/rllib/figure_eight.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def setup_exps():
config['lambda'] = 0.97
config['kl_target'] = 0.02
config['num_sgd_iter'] = 10
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config['horizon'] = HORIZON

# save the flow params for replay
Expand Down
1 change: 1 addition & 0 deletions examples/rllib/green_wave.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ def setup_exps():
config['lambda'] = 0.97
config['kl_target'] = 0.02
config['num_sgd_iter'] = 10
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config['horizon'] = HORIZON

# save the flow params for replay
Expand Down
1 change: 1 addition & 0 deletions examples/rllib/multiagent_exps/multiagent_figure_eight.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ def setup_exps():
config['kl_target'] = 0.02
config['num_sgd_iter'] = 10
config['horizon'] = HORIZON
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config['observation_filter'] = 'NoFilter'

# save the flow params for replay
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def setup_exps():
config['model'].update({'fcnet_hiddens': [32, 32]})
config['lr'] = tune.grid_search([1e-5])
config['horizon'] = HORIZON
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config['observation_filter'] = 'NoFilter'

# save the flow params for replay
Expand Down
1 change: 1 addition & 0 deletions examples/rllib/stabilizing_highway.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def setup_exps():
config["lambda"] = 0.97
config["kl_target"] = 0.02
config["num_sgd_iter"] = 10
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config["horizon"] = HORIZON

# save the flow params for replay
Expand Down
1 change: 1 addition & 0 deletions examples/rllib/stabilizing_the_ring.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ def setup_exps():
config["lambda"] = 0.97
config["kl_target"] = 0.02
config["num_sgd_iter"] = 10
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config["horizon"] = HORIZON

# save the flow params for replay
Expand Down
1 change: 1 addition & 0 deletions examples/rllib/velocity_bottleneck.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def setup_exps():
config["lambda"] = 0.97
config["kl_target"] = 0.02
config["num_sgd_iter"] = 10
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config["horizon"] = HORIZON

# save the flow params for replay
Expand Down
3 changes: 2 additions & 1 deletion flow/benchmarks/rllib/ars_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
parser.add_argument(
'--num_cpus',
type=int,
default=6,
default=2,
help="The number of rollouts to average over.")

if __name__ == "__main__":
Expand Down Expand Up @@ -91,6 +91,7 @@
config["sgd_stepsize"] = 0.2
config["noise_stdev"] = 0.2
config['eval_prob'] = 0.05
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config['observation_filter'] = "NoFilter"

# save the flow params for replay
Expand Down
3 changes: 2 additions & 1 deletion flow/benchmarks/rllib/es_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
parser.add_argument(
'--num_cpus',
type=int,
default=6,
default=2,
help="The number of cpus to use.")

if __name__ == "__main__":
Expand Down Expand Up @@ -89,6 +89,7 @@
config["stepsize"] = 0.02

config["model"]["fcnet_hiddens"] = [100, 50, 25]
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config["observation_filter"] = "NoFilter"
# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
Expand Down
3 changes: 2 additions & 1 deletion flow/benchmarks/rllib/ppo_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@
parser.add_argument(
'--num_cpus',
type=int,
default=6,
default=2,
help="The number of cpus to use.")

if __name__ == "__main__":
Expand Down Expand Up @@ -97,6 +97,7 @@
config["lr"] = step_size
config["vf_clip_param"] = 1e6
config["num_sgd_iter"] = 10
config['clip_actions'] = False # FIXME(ev) temporary ray bug
config["model"]["fcnet_hiddens"] = [100, 50, 25]
config["observation_filter"] = "NoFilter"

Expand Down
31 changes: 23 additions & 8 deletions flow/utils/rllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@
This includes: environment generation, serialization, and visualization.
"""
import dill
import json
from copy import deepcopy
import os

from flow.core.params import SumoLaneChangeParams, SumoCarFollowingParams, \
SumoParams, InitialConfig, EnvParams, NetParams, InFlows
from flow.core.params import TrafficLightParams
from flow.core.params import VehicleParams

from ray.cloudpickle import cloudpickle


class FlowParamsEncoder(json.JSONEncoder):
"""
Expand Down Expand Up @@ -139,14 +141,27 @@ def get_flow_params(config):

def get_rllib_config(path):
"""Return the data from the specified rllib configuration file."""
jsonfile = path + '/params.json' # params.json is the config file
jsondata = json.loads(open(jsonfile).read())
return jsondata
config_path = os.path.join(path, "params.json")
if not os.path.exists(config_path):
config_path = os.path.join(path, "../params.json")
if not os.path.exists(config_path):
raise ValueError(
"Could not find params.json in either the checkpoint dir or "
"its parent directory.")
with open(config_path) as f:
config = json.load(f)
return config


def get_rllib_pkl(path):
"""Return the data from the specified rllib configuration file."""
pklfile = path + '/params.pkl' # params.json is the config file
with open(pklfile, 'rb') as file:
pkldata = dill.load(file)
return pkldata
config_path = os.path.join(path, "params.pkl")
if not os.path.exists(config_path):
config_path = os.path.join(path, "../params.pkl")
if not os.path.exists(config_path):
raise ValueError(
"Could not find params.pkl in either the checkpoint dir or "
"its parent directory.")
with open(config_path, 'rb') as f:
config = cloudpickle.load(f)
return config
87 changes: 44 additions & 43 deletions flow/visualize/visualizer_rllib.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

import argparse
from datetime import datetime
import gym
import numpy as np
import os
import sys
Expand All @@ -24,10 +25,8 @@
except ImportError:
from ray.rllib.agents.registry import get_agent_class
from ray.tune.registry import register_env
from ray.rllib.models import ModelCatalog
import gym

import flow.envs
# import flow.envs
from flow.core.util import emission_to_csv
from flow.utils.registry import make_create_env
from flow.utils.rllib import get_flow_params
Expand All @@ -44,27 +43,10 @@
"""


class _RLlibPreprocessorWrapper(gym.ObservationWrapper):
"""Adapts a RLlib preprocessor for use as an observation wrapper."""

def __init__(self, env, preprocessor):
super(_RLlibPreprocessorWrapper, self).__init__(env)
self.preprocessor = preprocessor

from gym.spaces.box import Box
self.observation_space = Box(
-1.0, 1.0, preprocessor.shape, dtype=np.float32)

def observation(self, observation):
return self.preprocessor.transform(observation)


def visualizer_rllib(args):
result_dir = args.result_dir if args.result_dir[-1] != '/' \
else args.result_dir[:-1]

# config = get_rllib_config(result_dir + '/..')
# pkl = get_rllib_pkl(result_dir + '/..')
config = get_rllib_config(result_dir)
# TODO(ev) backwards compatibility hack
try:
Expand Down Expand Up @@ -152,18 +134,16 @@ def visualizer_rllib(args):

# check if the environment is a single or multiagent environment, and
# get the right address accordingly
single_agent_envs = [env for env in dir(flow.envs)
if not env.startswith('__')]
# single_agent_envs = [env for env in dir(flow.envs)
# if not env.startswith('__')]

if flow_params['env_name'] in single_agent_envs:
env_loc = 'flow.envs'
else:
env_loc = 'flow.multiagent_envs'
# if flow_params['env_name'] in single_agent_envs:
# env_loc = 'flow.envs'
# else:
# env_loc = 'flow.multiagent_envs'

# Start the environment with the gui turned on and a path for the
# emission file
module = __import__(env_loc, fromlist=[flow_params['env_name']])
env_class = getattr(module, flow_params['env_name'])
env_params = flow_params['env']
env_params.restart_instance = False
if args.evaluate:
Expand All @@ -180,14 +160,10 @@ def visualizer_rllib(args):
checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
agent.restore(checkpoint)

_env = env_class(
env_params=env_params,
sim_params=sim_params,
scenario=scenario,
simulator=flow_params['simulator']
)
_prep = ModelCatalog.get_preprocessor(_env, options={})
env = _RLlibPreprocessorWrapper(_env, _prep)
if hasattr(agent, "local_evaluator") and os.environ["TEST_FLAG"] != 'True':
env = agent.local_evaluator.env
else:
env = gym.make(env_name)

if multiagent:
rets = {}
Expand All @@ -197,6 +173,26 @@ def visualizer_rllib(args):
rets[key] = []
else:
rets = []

if config['model']['use_lstm']:
use_lstm = True
if multiagent:
state_init = {}
# map the agent id to its policy
policy_map_fn = config['multiagent']['policy_mapping_fn'].func
size = config['model']['lstm_cell_size']
for key in config['multiagent']['policy_graphs'].keys():
state_init[key] = [np.zeros(size, np.float32),
np.zeros(size, np.float32)
]
else:
state_init = [
np.zeros(config['model']['lstm_cell_size'], np.float32),
np.zeros(config['model']['lstm_cell_size'], np.float32)
]
else:
use_lstm = False

final_outflows = []
mean_speed = []
for i in range(args.num_rollouts):
Expand All @@ -212,8 +208,14 @@ def visualizer_rllib(args):
if multiagent:
action = {}
for agent_id in state.keys():
action[agent_id] = agent.compute_action(
state[agent_id], policy_id=policy_map_fn(agent_id))
if use_lstm:
action[agent_id], state_init[agent_id], logits = \
agent.compute_action(
state[agent_id], state=state_init[agent_id],
policy_id=policy_map_fn(agent_id))
else:
action[agent_id] = agent.compute_action(
state[agent_id], policy_id=policy_map_fn(agent_id))
else:
action = agent.compute_action(state)
state, reward, done, _ = env.step(action)
Expand Down Expand Up @@ -319,12 +321,11 @@ def create_parser():
help='Specifies whether to use the \'evaluate\' reward '
'for the environment.')
parser.add_argument(
'--render-mode',
'--render_mode',
type=str,
default='sumo-gui',
help='Pick the render mode. Options include sumo-web3d, '
'rgbd, sumo-gui, and no-render. For more details'
'see the visualization tutorial.')
default='sumo_gui',
help='Pick the render mode. Options include sumo_web3d, '
'rgbd and sumo_gui')
parser.add_argument(
'--save_render',
action='store_true',
Expand Down
Binary file modified tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 7b5a881

Please sign in to comment.