Upgrade ray to 0.7.3 (#732)

* upgrade ray from master * pull kparvate/upgrade_ray_2 * update benchmark scripts to only run ppo, and set 14 cpus * length includes internal lengths, added no_internal_length * flake * make aboudy's changes * fix * checkout origin/master in autoscale.yaml * added upadted pkl results for rllib * updated obs space of multiagent highway, fixed multiagent features in visualizer_rllib * upgraded numpy, setuptools * bug fixes
flow-project · Sep 23, 2019 · 26ffa1c · 26ffa1c
1 parent b49d918
commit 26ffa1c
Show file tree

Hide file tree

Showing 35 changed files with 151 additions and 128 deletions.
diff --git a/environment.yml b/environment.yml
@@ -2,15 +2,14 @@ name: flow
 
 dependencies:
     - python==3.6.8
-    - numpy==1.14.0
     - scipy==1.1.0
     - lxml==4.2.4
     - six==1.11.0
     - path.py
     - python-dateutil==2.7.3
     - tensorflow==1.9.0
-    - cloudpickle==0.5.3
-    - setuptools==39.1.0
+    - cloudpickle==1.2.1
+    - setuptools==41.0.0
     - pip:
         - gym==0.14.0
         - pyprind==2.11.2
@@ -20,12 +19,9 @@ dependencies:
         - matplotlib==3.0.0
         - dill
         - lz4
-        - ray==0.6.1
+        - ray==0.7.3
         - setproctitle
         - psutil
         - opencv-python
         - boto3==1.4.8
         - redis~=2.10.6
-        - git+https://github.com/Theano/Theano.git@adfe319ce6b781083d8dc3200fb4481b00853791#egg=Theano
-        - git+https://github.com/neocxi/Lasagne.git@484866cf8b38d878e92d521be445968531646bb8#egg=Lasagne
-        - git+https://github.com/plotly/plotly.py.git@2594076e29584ede2d09f2aa40a8a195b3f3fc66#egg=plotly
diff --git a/examples/rllib/figure_eight.py b/examples/rllib/figure_eight.py
@@ -135,7 +135,7 @@ def setup_exps():
 
 if __name__ == '__main__':
     alg_run, gym_name, config = setup_exps()
-    ray.init(num_cpus=N_CPUS + 1, redirect_output=False)
+    ray.init(num_cpus=N_CPUS + 1)
     trials = run_experiments({
         flow_params['exp_tag']: {
             'run': alg_run,

diff --git a/examples/rllib/multiagent_exps/multiagent_figure_eight.py b/examples/rllib/multiagent_exps/multiagent_figure_eight.py
@@ -15,7 +15,7 @@
     from ray.rllib.agents.agent import get_agent_class
 except ImportError:
     from ray.rllib.agents.registry import get_agent_class
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
+from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
 from ray import tune
 from ray.tune.registry import register_env
 from ray.tune import run_experiments
@@ -153,7 +153,7 @@ def setup_exps():
     act_space = test_env.action_space
 
     def gen_policy():
-        return (PPOPolicyGraph, obs_space, act_space, {})
+        return PPOTFPolicy, obs_space, act_space, {}
 
     # Setup PG with an ensemble of `num_policies` different policy graphs
     policy_graphs = {'av': gen_policy(), 'adversary': gen_policy()}
@@ -163,7 +163,7 @@ def policy_mapping_fn(agent_id):
 
     config.update({
         'multiagent': {
-            'policy_graphs': policy_graphs,
+            'policies': policy_graphs,
             'policy_mapping_fn': tune.function(policy_mapping_fn)
         }
     })

diff --git a/examples/rllib/multiagent_exps/multiagent_highway.py b/examples/rllib/multiagent_exps/multiagent_highway.py
@@ -9,7 +9,7 @@
     from ray.rllib.agents.agent import get_agent_class
 except ImportError:
     from ray.rllib.agents.registry import get_agent_class
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
+from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
 from ray import tune
 from ray.tune.registry import register_env
 from ray.tune import run_experiments
@@ -205,7 +205,7 @@ def setup_exps(flow_params):
 
     # multiagent configuration
     temp_env = create_env()
-    policy_graphs = {'av': (PPOPolicyGraph,
+    policy_graphs = {'av': (PPOTFPolicy,
                             temp_env.observation_space,
                             temp_env.action_space,
                             {})}
@@ -215,7 +215,7 @@ def policy_mapping_fn(_):
 
     config.update({
         'multiagent': {
-            'policy_graphs': policy_graphs,
+            'policies': policy_graphs,
             'policy_mapping_fn': tune.function(policy_mapping_fn),
             'policies_to_train': ['av']
         }

diff --git a/examples/rllib/multiagent_exps/multiagent_stabilizing_the_ring.py b/examples/rllib/multiagent_exps/multiagent_stabilizing_the_ring.py
@@ -11,7 +11,7 @@
     from ray.rllib.agents.agent import get_agent_class
 except ImportError:
     from ray.rllib.agents.registry import get_agent_class
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
+from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
 from ray import tune
 from ray.tune.registry import register_env
 from ray.tune import run_experiments
@@ -146,7 +146,7 @@ def setup_exps():
     act_space = test_env.action_space
 
     def gen_policy():
-        return (PPOPolicyGraph, obs_space, act_space, {})
+        return PPOTFPolicy, obs_space, act_space, {}
 
     # Setup PG with an ensemble of `num_policies` different policy graphs
     policy_graphs = {'av': gen_policy()}
@@ -156,7 +156,7 @@ def policy_mapping_fn(_):
 
     config.update({
         'multiagent': {
-            'policy_graphs': policy_graphs,
+            'policies': policy_graphs,
             'policy_mapping_fn': tune.function(policy_mapping_fn),
             'policies_to_train': ['av']
         }

diff --git a/examples/rllib/multiagent_exps/multiagent_traffic_light_grid.py b/examples/rllib/multiagent_exps/multiagent_traffic_light_grid.py
@@ -8,7 +8,7 @@
     from ray.rllib.agents.agent import get_agent_class
 except ImportError:
     from ray.rllib.agents.registry import get_agent_class
-from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph
+from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
 from ray import tune
 from ray.tune.registry import register_env
 from ray.tune import run_experiments
@@ -203,7 +203,7 @@ def setup_exps_PPO(flow_params):
     act_space = test_env.action_space
 
     def gen_policy():
-        return (PPOPolicyGraph, obs_space, act_space, {})
+        return PPOTFPolicy, obs_space, act_space, {}
 
     # Setup PG with a single policy graph for all agents
     policy_graphs = {'av': gen_policy()}
@@ -213,7 +213,7 @@ def policy_mapping_fn(_):
 
     config.update({
         'multiagent': {
-            'policy_graphs': policy_graphs,
+            'policies': policy_graphs,
             'policy_mapping_fn': tune.function(policy_mapping_fn),
             'policies_to_train': ['av']
         }

diff --git a/examples/rllib/stabilizing_highway.py b/examples/rllib/stabilizing_highway.py
@@ -181,7 +181,7 @@ def setup_exps():
 
 if __name__ == "__main__":
     alg_run, gym_name, config = setup_exps()
-    ray.init(num_cpus=N_CPUS + 1, redirect_output=False)
+    ray.init(num_cpus=N_CPUS + 1)
     trials = run_experiments({
         flow_params["exp_tag"]: {
             "run": alg_run,

diff --git a/examples/rllib/stabilizing_the_ring.py b/examples/rllib/stabilizing_the_ring.py
@@ -138,7 +138,7 @@ def setup_exps():
 
 if __name__ == "__main__":
     alg_run, gym_name, config = setup_exps()
-    ray.init(num_cpus=N_CPUS + 1, redirect_output=False)
+    ray.init(num_cpus=N_CPUS + 1)
     trials = run_experiments({
         flow_params["exp_tag"]: {
             "run": alg_run,

diff --git a/examples/rllib/traffic_light_grid.py b/examples/rllib/traffic_light_grid.py
@@ -281,7 +281,7 @@ def setup_exps(use_inflows=False):
 
 if __name__ == '__main__':
     alg_run, gym_name, config = setup_exps()
-    ray.init(num_cpus=N_CPUS + 1, redirect_output=False)
+    ray.init(num_cpus=N_CPUS + 1)
     trials = run_experiments({
         flow_params['exp_tag']: {
             'run': alg_run,

diff --git a/examples/rllib/velocity_bottleneck.py b/examples/rllib/velocity_bottleneck.py
@@ -203,7 +203,7 @@ def setup_exps():
 
 if __name__ == "__main__":
     alg_run, gym_name, config = setup_exps()
-    ray.init(num_cpus=N_CPUS + 1, redirect_output=False)
+    ray.init(num_cpus=N_CPUS + 1)
     trials = run_experiments({
         flow_params["exp_tag"]: {
             "run": alg_run,

diff --git a/flow/benchmarks/rllib/ars_runner.py b/flow/benchmarks/rllib/ars_runner.py
@@ -79,7 +79,7 @@
     alg_run = "ARS"
 
     # initialize a ray instance
-    ray.init(redirect_output=True)
+    ray.init()
 
     agent_cls = get_agent_class(alg_run)
     config = agent_cls._default_config.copy()

diff --git a/flow/benchmarks/rllib/es_runner.py b/flow/benchmarks/rllib/es_runner.py
@@ -78,7 +78,7 @@
     create_env, env_name = make_create_env(params=flow_params, version=0)
 
     # initialize a ray instance
-    ray.init(redirect_output=True)
+    ray.init()
 
     alg_run = "ES"
 

diff --git a/flow/benchmarks/rllib/ppo_runner.py b/flow/benchmarks/rllib/ppo_runner.py
@@ -76,7 +76,7 @@
     create_env, env_name = make_create_env(params=flow_params, version=0)
 
     # initialize a ray instance
-    ray.init(redirect_output=True)
+    ray.init()
 
     alg_run = "PPO"
 

diff --git a/flow/benchmarks/run_all_benchmarks.sh b/flow/benchmarks/run_all_benchmarks.sh
@@ -13,14 +13,15 @@ cd "$parent_path"
 
 dt=$(date '+%Y_%m_%d_%H%M');
 echo $dt
-for run_script in rllib/*_runner.py; do
+i=0
+for run_script in rllib/ppo_runner.py; do
     declare alg=`echo ${run_script} | cut -d'/' -f 2 | cut -d'_' -f 1`
     for benchmark in "${benchmarks[@]}"; do
-
+        i=$((i+1))
         echo "====================================================================="
         echo "Training ${benchmark} with ${alg}"
-        echo "ray exec ../../scripts/benchmark_autoscale.yaml \"python ./flow/flow/benchmarks/${run_script} --upload_dir=\"flow-benchmark.results/${dt}/\" --benchmark_name=${benchmark}\" --start --stop --cluster-name=all_benchmark_${benchmark}_${alg}_$dt --tmux"
+        echo "ray exec ../../scripts/benchmark_autoscale.yaml \"python ./flow/flow/benchmarks/${run_script} --upload_dir=\"flow-benchmark.results/${dt}/\" --benchmark_name=${benchmark} --num_cpus 14\" --start --stop --cluster-name=all_benchmark_${benchmark}_${alg}_$dt --tmux"
         echo "====================================================================="
-        ray exec ../../scripts/benchmark_autoscale.yaml "python ./flow/flow/benchmarks/${run_script} --upload_dir=\"flow-benchmark.results/${dt}/\" --benchmark_name=${benchmark}" --start --stop --cluster-name=all_benchmark_${benchmark}_${alg}_$dt --tmux
+        ray exec ../../scripts/benchmark_autoscale.yaml "python ./flow/flow/benchmarks/${run_script} --upload_dir=\"flow-benchmark.results/${dt}/\" --benchmark_name=${benchmark} --num_cpus 14" --start --stop --cluster-name=all_benchmark_${benchmark}_${alg}_$dt --tmux
     done
 done
diff --git a/flow/core/kernel/network/base.py b/flow/core/kernel/network/base.py
@@ -344,7 +344,7 @@ def gen_even_start_pos(self, initial_config, num_vehicles):
 
             # ensures that you are in an acceptable edge
             while pos[0] not in available_edges:
-                x = (x + self.edge_length(pos[0])) % self.length()
+                x = (x + self.edge_length(pos[0])) % self.non_internal_length()
                 pos = self.get_edge(x)
 
             # ensure that in variable lane settings vehicles always start a
@@ -366,7 +366,7 @@ def gen_even_start_pos(self, initial_config, num_vehicles):
                 if car_count == num_vehicles:
                     break
 
-            x = (x + increment + VEHICLE_LENGTH + min_gap) % self.length()
+            x = (x + increment + VEHICLE_LENGTH + min_gap) % self.non_internal_length()
 
         # add a perturbation to each vehicle, while not letting the vehicle
         # leave its current edge

diff --git a/flow/core/kernel/network/traci.py b/flow/core/kernel/network/traci.py
@@ -79,7 +79,8 @@ def __init__(self, master_kernel, sim_params):
         self._edge_list = None
         self._junction_list = None
         self.__max_speed = None
-        self.__length = None
+        self.__length = None  # total length
+        self.__non_internal_length = None  # total length of non-internal edges
         self.rts = None
         self.cfg = None
 
@@ -158,7 +159,7 @@ def generate_network(self, network):
 
         # length of the network, or the portion of the network in
         # which cars are meant to be distributed
-        self.__length = sum(
+        self.__non_internal_length = sum(
             self.edge_length(edge_id) for edge_id in self.get_edge_list()
         )
 
@@ -190,6 +191,10 @@ def generate_network(self, network):
 
         self.total_edgestarts_dict = dict(self.total_edgestarts)
 
+        self.__length = sum(
+            self._edges[edge_id]['length'] for edge_id in self._edges
+        )
+
         if self.network.routes is None:
             print("No routes specified, defaulting to single edge routes.")
             self.network.routes = {edge: [edge] for edge in self._edge_list}
@@ -278,6 +283,10 @@ def length(self):
         """See parent class."""
         return self.__length
 
+    def non_internal_length(self):
+        """See parent class."""
+        return self.__non_internal_length
+
     def speed_limit(self, edge_id):
         """See parent class."""
         try:

diff --git a/flow/envs/multiagent/highway.py b/flow/envs/multiagent/highway.py
@@ -68,7 +68,7 @@ def __init__(self, env_params, sim_params, network, simulator='traci'):
     @property
     def observation_space(self):
         """See class definition."""
-        return Box(low=0, high=1, shape=(5, ), dtype=np.float32)
+        return Box(-float('inf'), float('inf'), shape=(5,), dtype=np.float32)
 
     @property
     def action_space(self):

diff --git a/flow/visualize/visualizer_rllib.py b/flow/visualize/visualizer_rllib.py
@@ -55,16 +55,12 @@ def visualizer_rllib(args):
         else args.result_dir[:-1]
 
     config = get_rllib_config(result_dir)
-    # TODO(ev) backwards compatibility hack
-    try:
-        pkl = get_rllib_pkl(result_dir)
-    except Exception:
-        pass
 
     # check if we have a multiagent environment but in a
     # backwards compatible way
-    if config.get('multiagent', {}).get('policy_graphs', {}):
+    if config.get('multiagent', {}).get('policies', None):
         multiagent = True
+        pkl = get_rllib_pkl(result_dir)
         config['multiagent'] = pkl['multiagent']
     else:
         multiagent = False
@@ -167,7 +163,7 @@ def visualizer_rllib(args):
         rets = {}
         # map the agent id to its policy
         policy_map_fn = config['multiagent']['policy_mapping_fn'].func
-        for key in config['multiagent']['policy_graphs'].keys():
+        for key in config['multiagent']['policies'].keys():
             rets[key] = []
     else:
         rets = []
@@ -179,10 +175,9 @@ def visualizer_rllib(args):
             # map the agent id to its policy
             policy_map_fn = config['multiagent']['policy_mapping_fn'].func
             size = config['model']['lstm_cell_size']
-            for key in config['multiagent']['policy_graphs'].keys():
+            for key in config['multiagent']['policies'].keys():
                 state_init[key] = [np.zeros(size, np.float32),
-                                   np.zeros(size, np.float32)
-                                   ]
+                                   np.zeros(size, np.float32)]
         else:
             state_init = [
                 np.zeros(config['model']['lstm_cell_size'], np.float32),

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 gym==0.14.0
-numpy==1.14.0
+numpy==1.16.0
 scipy==1.1.0
 lxml==4.2.4
 pyprind==2.11.2
@@ -14,7 +14,7 @@ pyglet==1.3.2
 matplotlib==3.0.0
 imutils==0.5.1
 numpydoc
-ray==0.6.1
+ray==0.7.3
 opencv-python
 dill
 lz4
@@ -23,3 +23,4 @@ psutil
 opencv-python
 boto3==1.4.8
 redis~=2.10.6
+pandas==0.24.2