Merge pull request #300 from flow-project/remove_clipping

Remove clip_action from ray run scripts - Ray's clip_actions is currently buggy for continuous spaces, we temporarily set it to False since we clip on our own end. - Adds Ray 0.6.1 to the environment.yml - Upgrades test_visuallizers to ray 0.6.1
flow-project · Feb 5, 2019 · 7b5a881 · 7b5a881
2 parents 5b772e7 + f6a7728
commit 7b5a881
Show file tree

Hide file tree

Showing 27 changed files with 325 additions and 85 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -22,23 +22,6 @@ before_install:
   - conda env create -f environment.yml
   - source activate flow
 
-  # Requirements for Ray
-  - pip install opencv-python
-  # Setup ray from https://s3-us-west-2.amazonaws.com/ray-wheels/
-  - pip install https://s3-us-west-1.amazonaws.com/flow.utils/ray-0.5.3-cp35-cp35m-manylinux1_x86_64.whl
-
-  # Copy over local rllib changes (testing branch)
-  - pushd $HOME/build/flow-project
-  -     git clone https://github.com/flow-project/ray.git
-  -     cd ray && git fetch && git checkout ray_merge && cd ..
-  -     pushd ray
-  -         RAY_SITE_DIR=`python -c "import ray; print(ray.__path__[0])"`
-  -         rm -rf $RAY_SITE_DIR/rllib
-  -         ls $RAY_SITE_DIR
-  -         cp -r python/ray/rllib $RAY_SITE_DIR/rllib
-  -     popd
-  - popd
-
   # [sumo] dependencies and binaries
   - pushd $HOME/build/flow-project
   -     ./flow/scripts/setup_sumo_ubuntu1404.sh

diff --git a/docs/source/flow_setup.rst b/docs/source/flow_setup.rst
@@ -190,9 +190,12 @@ optional but may be of use when trying to execute some of the examples files
 located in Flow.
 RLlib is one such library.
 First visit <https://github.com/flow-project/ray/blob/master/doc/source/installation.rst> and
-install the required packages. Do NOT `pip install ray`.
+install the required packages.
 
-The installation process for this library is as follows:
+If you are not intending to develop RL algorithms or customize rllib you don't need to do anything,
+Ray was installed when you created the conda environment. 
+
+If you are intending to modify Ray, the installation process for this library is as follows:
 
 ::
 

diff --git a/environment.yml b/environment.yml
@@ -10,7 +10,6 @@ dependencies:
     - python-dateutil==2.7.3
     - tensorflow==1.9.0
     - cloudpickle==0.5.3
-    - opencv
     - setuptools==39.1.0
     - pip:
         - gym==0.10.5
@@ -19,6 +18,11 @@ dependencies:
         - cached_property
         - joblib==0.10.3
         - dill
+        - lz4
+        - ray==0.6.1
+        - setproctitle
+        - psutil
+        - opencv-python
         - boto3==1.4.8
         - redis~=2.10.6
         - git+https://github.com/Theano/Theano.git@adfe319ce6b781083d8dc3200fb4481b00853791#egg=Theano

diff --git a/examples/rllib/cooperative_merge.py b/examples/rllib/cooperative_merge.py
@@ -157,6 +157,7 @@ def setup_exps():
     config['lambda'] = 0.97
     config['kl_target'] = 0.02
     config['num_sgd_iter'] = 10
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config['horizon'] = HORIZON
 
     # save the flow params for replay

diff --git a/examples/rllib/figure_eight.py b/examples/rllib/figure_eight.py
@@ -106,6 +106,7 @@ def setup_exps():
     config['lambda'] = 0.97
     config['kl_target'] = 0.02
     config['num_sgd_iter'] = 10
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config['horizon'] = HORIZON
 
     # save the flow params for replay

diff --git a/examples/rllib/green_wave.py b/examples/rllib/green_wave.py
@@ -179,6 +179,7 @@ def setup_exps():
     config['lambda'] = 0.97
     config['kl_target'] = 0.02
     config['num_sgd_iter'] = 10
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config['horizon'] = HORIZON
 
     # save the flow params for replay

diff --git a/examples/rllib/multiagent_exps/multiagent_figure_eight.py b/examples/rllib/multiagent_exps/multiagent_figure_eight.py
@@ -123,6 +123,7 @@ def setup_exps():
     config['kl_target'] = 0.02
     config['num_sgd_iter'] = 10
     config['horizon'] = HORIZON
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config['observation_filter'] = 'NoFilter'
 
     # save the flow params for replay

diff --git a/examples/rllib/multiagent_exps/multiagent_stabilizing_the_ring.py b/examples/rllib/multiagent_exps/multiagent_stabilizing_the_ring.py
@@ -116,6 +116,7 @@ def setup_exps():
     config['model'].update({'fcnet_hiddens': [32, 32]})
     config['lr'] = tune.grid_search([1e-5])
     config['horizon'] = HORIZON
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config['observation_filter'] = 'NoFilter'
 
     # save the flow params for replay

diff --git a/examples/rllib/stabilizing_highway.py b/examples/rllib/stabilizing_highway.py
@@ -153,6 +153,7 @@ def setup_exps():
     config["lambda"] = 0.97
     config["kl_target"] = 0.02
     config["num_sgd_iter"] = 10
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config["horizon"] = HORIZON
 
     # save the flow params for replay

diff --git a/examples/rllib/stabilizing_the_ring.py b/examples/rllib/stabilizing_the_ring.py
@@ -106,6 +106,7 @@ def setup_exps():
     config["lambda"] = 0.97
     config["kl_target"] = 0.02
     config["num_sgd_iter"] = 10
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config["horizon"] = HORIZON
 
     # save the flow params for replay

diff --git a/examples/rllib/velocity_bottleneck.py b/examples/rllib/velocity_bottleneck.py
@@ -177,6 +177,7 @@ def setup_exps():
     config["lambda"] = 0.97
     config["kl_target"] = 0.02
     config["num_sgd_iter"] = 10
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config["horizon"] = HORIZON
 
     # save the flow params for replay

diff --git a/flow/benchmarks/rllib/ars_runner.py b/flow/benchmarks/rllib/ars_runner.py
@@ -52,7 +52,7 @@
 parser.add_argument(
     '--num_cpus',
     type=int,
-    default=6,
+    default=2,
     help="The number of rollouts to average over.")
 
 if __name__ == "__main__":
@@ -91,6 +91,7 @@
     config["sgd_stepsize"] = 0.2
     config["noise_stdev"] = 0.2
     config['eval_prob'] = 0.05
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config['observation_filter'] = "NoFilter"
 
     # save the flow params for replay

diff --git a/flow/benchmarks/rllib/es_runner.py b/flow/benchmarks/rllib/es_runner.py
@@ -51,7 +51,7 @@
 parser.add_argument(
     '--num_cpus',
     type=int,
-    default=6,
+    default=2,
     help="The number of cpus to use.")
 
 if __name__ == "__main__":
@@ -89,6 +89,7 @@
     config["stepsize"] = 0.02
 
     config["model"]["fcnet_hiddens"] = [100, 50, 25]
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config["observation_filter"] = "NoFilter"
     # save the flow params for replay
     flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,

diff --git a/flow/benchmarks/rllib/ppo_runner.py b/flow/benchmarks/rllib/ppo_runner.py
@@ -51,7 +51,7 @@
 parser.add_argument(
     '--num_cpus',
     type=int,
-    default=6,
+    default=2,
     help="The number of cpus to use.")
 
 if __name__ == "__main__":
@@ -97,6 +97,7 @@
     config["lr"] = step_size
     config["vf_clip_param"] = 1e6
     config["num_sgd_iter"] = 10
+    config['clip_actions'] = False  # FIXME(ev) temporary ray bug
     config["model"]["fcnet_hiddens"] = [100, 50, 25]
     config["observation_filter"] = "NoFilter"
 

diff --git a/flow/utils/rllib.py b/flow/utils/rllib.py
@@ -3,15 +3,17 @@
 
 This includes: environment generation, serialization, and visualization.
 """
-import dill
 import json
 from copy import deepcopy
+import os
 
 from flow.core.params import SumoLaneChangeParams, SumoCarFollowingParams, \
     SumoParams, InitialConfig, EnvParams, NetParams, InFlows
 from flow.core.params import TrafficLightParams
 from flow.core.params import VehicleParams
 
+from ray.cloudpickle import cloudpickle
+
 
 class FlowParamsEncoder(json.JSONEncoder):
     """
@@ -139,14 +141,27 @@ def get_flow_params(config):
 
 def get_rllib_config(path):
     """Return the data from the specified rllib configuration file."""
-    jsonfile = path + '/params.json'  # params.json is the config file
-    jsondata = json.loads(open(jsonfile).read())
-    return jsondata
+    config_path = os.path.join(path, "params.json")
+    if not os.path.exists(config_path):
+        config_path = os.path.join(path, "../params.json")
+    if not os.path.exists(config_path):
+        raise ValueError(
+            "Could not find params.json in either the checkpoint dir or "
+            "its parent directory.")
+    with open(config_path) as f:
+        config = json.load(f)
+    return config
 
 
 def get_rllib_pkl(path):
     """Return the data from the specified rllib configuration file."""
-    pklfile = path + '/params.pkl'  # params.json is the config file
-    with open(pklfile, 'rb') as file:
-        pkldata = dill.load(file)
-    return pkldata
+    config_path = os.path.join(path, "params.pkl")
+    if not os.path.exists(config_path):
+        config_path = os.path.join(path, "../params.pkl")
+    if not os.path.exists(config_path):
+        raise ValueError(
+            "Could not find params.pkl in either the checkpoint dir or "
+            "its parent directory.")
+    with open(config_path, 'rb') as f:
+        config = cloudpickle.load(f)
+    return config
diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py
@@ -14,6 +14,7 @@
 
 import argparse
 from datetime import datetime
+import gym
 import numpy as np
 import os
 import sys
@@ -24,10 +25,8 @@
 except ImportError:
     from ray.rllib.agents.registry import get_agent_class
 from ray.tune.registry import register_env
-from ray.rllib.models import ModelCatalog
-import gym
 
-import flow.envs
+# import flow.envs
 from flow.core.util import emission_to_csv
 from flow.utils.registry import make_create_env
 from flow.utils.rllib import get_flow_params
@@ -44,27 +43,10 @@
 """
 
 
-class _RLlibPreprocessorWrapper(gym.ObservationWrapper):
-    """Adapts a RLlib preprocessor for use as an observation wrapper."""
-
-    def __init__(self, env, preprocessor):
-        super(_RLlibPreprocessorWrapper, self).__init__(env)
-        self.preprocessor = preprocessor
-
-        from gym.spaces.box import Box
-        self.observation_space = Box(
-            -1.0, 1.0, preprocessor.shape, dtype=np.float32)
-
-    def observation(self, observation):
-        return self.preprocessor.transform(observation)
-
-
 def visualizer_rllib(args):
     result_dir = args.result_dir if args.result_dir[-1] != '/' \
         else args.result_dir[:-1]
 
-    # config = get_rllib_config(result_dir + '/..')
-    # pkl = get_rllib_pkl(result_dir + '/..')
     config = get_rllib_config(result_dir)
     # TODO(ev) backwards compatibility hack
     try:
@@ -152,18 +134,16 @@ def visualizer_rllib(args):
 
     # check if the environment is a single or multiagent environment, and
     # get the right address accordingly
-    single_agent_envs = [env for env in dir(flow.envs)
-                         if not env.startswith('__')]
+    # single_agent_envs = [env for env in dir(flow.envs)
+    #                      if not env.startswith('__')]
 
-    if flow_params['env_name'] in single_agent_envs:
-        env_loc = 'flow.envs'
-    else:
-        env_loc = 'flow.multiagent_envs'
+    # if flow_params['env_name'] in single_agent_envs:
+    #     env_loc = 'flow.envs'
+    # else:
+    #     env_loc = 'flow.multiagent_envs'
 
     # Start the environment with the gui turned on and a path for the
     # emission file
-    module = __import__(env_loc, fromlist=[flow_params['env_name']])
-    env_class = getattr(module, flow_params['env_name'])
     env_params = flow_params['env']
     env_params.restart_instance = False
     if args.evaluate:
@@ -180,14 +160,10 @@ def visualizer_rllib(args):
     checkpoint = checkpoint + '/checkpoint-' + args.checkpoint_num
     agent.restore(checkpoint)
 
-    _env = env_class(
-        env_params=env_params,
-        sim_params=sim_params,
-        scenario=scenario,
-        simulator=flow_params['simulator']
-    )
-    _prep = ModelCatalog.get_preprocessor(_env, options={})
-    env = _RLlibPreprocessorWrapper(_env, _prep)
+    if hasattr(agent, "local_evaluator") and os.environ["TEST_FLAG"] != 'True':
+        env = agent.local_evaluator.env
+    else:
+        env = gym.make(env_name)
 
     if multiagent:
         rets = {}
@@ -197,6 +173,26 @@ def visualizer_rllib(args):
             rets[key] = []
     else:
         rets = []
+
+    if config['model']['use_lstm']:
+        use_lstm = True
+        if multiagent:
+            state_init = {}
+            # map the agent id to its policy
+            policy_map_fn = config['multiagent']['policy_mapping_fn'].func
+            size = config['model']['lstm_cell_size']
+            for key in config['multiagent']['policy_graphs'].keys():
+                state_init[key] = [np.zeros(size, np.float32),
+                                   np.zeros(size, np.float32)
+                                   ]
+        else:
+            state_init = [
+                np.zeros(config['model']['lstm_cell_size'], np.float32),
+                np.zeros(config['model']['lstm_cell_size'], np.float32)
+            ]
+    else:
+        use_lstm = False
+
     final_outflows = []
     mean_speed = []
     for i in range(args.num_rollouts):
@@ -212,8 +208,14 @@ def visualizer_rllib(args):
             if multiagent:
                 action = {}
                 for agent_id in state.keys():
-                    action[agent_id] = agent.compute_action(
-                        state[agent_id], policy_id=policy_map_fn(agent_id))
+                    if use_lstm:
+                        action[agent_id], state_init[agent_id], logits = \
+                            agent.compute_action(
+                            state[agent_id], state=state_init[agent_id],
+                            policy_id=policy_map_fn(agent_id))
+                    else:
+                        action[agent_id] = agent.compute_action(
+                            state[agent_id], policy_id=policy_map_fn(agent_id))
             else:
                 action = agent.compute_action(state)
             state, reward, done, _ = env.step(action)
@@ -319,12 +321,11 @@ def create_parser():
         help='Specifies whether to use the \'evaluate\' reward '
              'for the environment.')
     parser.add_argument(
-        '--render-mode',
+        '--render_mode',
         type=str,
-        default='sumo-gui',
-        help='Pick the render mode. Options include sumo-web3d, '
-             'rgbd, sumo-gui, and no-render. For more details'
-             'see the visualization tutorial.')
+        default='sumo_gui',
+        help='Pick the render mode. Options include sumo_web3d, '
+             'rgbd and sumo_gui')
     parser.add_argument(
         '--save_render',
         action='store_true',

diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1 b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1
diff --git a/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata b/tests/data/rllib_data/multi_agent/checkpoint_1/checkpoint-1.tune_metadata